Adding upstream version 5.10.209.upstream/5.10.209 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
commit: 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree: a94efe259b9009378be6d90eb30d2b019d95c194 /include/rdma
parent: Initial commit. (diff)
download: linux-430c2fc249ea5c0536abd21c23382884005c9093.tar.xz
linux-430c2fc249ea5c0536abd21c23382884005c9093.zip
40 files changed, 13906 insertions, 0 deletions
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
new file mode 100644
index 000000000..83139b9ce
--- /dev/null
+++ b/include/rdma/ib.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2010 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef _RDMA_IB_H
+#define _RDMA_IB_H
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+
+struct ib_addr {
+	union {
+		__u8		uib_addr8[16];
+		__be16		uib_addr16[8];
+		__be32		uib_addr32[4];
+		__be64		uib_addr64[2];
+	} ib_u;
+#define sib_addr8		ib_u.uib_addr8
+#define sib_addr16		ib_u.uib_addr16
+#define sib_addr32		ib_u.uib_addr32
+#define sib_addr64		ib_u.uib_addr64
+#define sib_raw			ib_u.uib_addr8
+#define sib_subnet_prefix	ib_u.uib_addr64[0]
+#define sib_interface_id	ib_u.uib_addr64[1]
+};
+
+static inline bool ib_addr_any(const struct ib_addr *a)
+{
+	return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0);
+}
+
+static inline bool ib_addr_loopback(const struct ib_addr *a)
+{
+	return ((a->sib_addr32[0] | a->sib_addr32[1] |
+		 a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0);
+}
+
+static inline void ib_addr_set(struct ib_addr *addr,
+			       __be32 w1, __be32 w2, __be32 w3, __be32 w4)
+{
+	addr->sib_addr32[0] = w1;
+	addr->sib_addr32[1] = w2;
+	addr->sib_addr32[2] = w3;
+	addr->sib_addr32[3] = w4;
+}
+
+static inline int ib_addr_cmp(const struct ib_addr *a1, const struct ib_addr *a2)
+{
+	return memcmp(a1, a2, sizeof(struct ib_addr));
+}
+
+struct sockaddr_ib {
+	unsigned short int	sib_family;	/* AF_IB */
+	__be16			sib_pkey;
+	__be32			sib_flowinfo;
+	struct ib_addr		sib_addr;
+	__be64			sib_sid;
+	__be64			sib_sid_mask;
+	__u64			sib_scope_id;
+};
+
+/*
+ * The IB interfaces that use write() as bi-directional ioctl() are
+ * fundamentally unsafe, since there are lots of ways to trigger "write()"
+ * calls from various contexts with elevated privileges. That includes the
+ * traditional suid executable error message writes, but also various kernel
+ * interfaces that can write to file descriptors.
+ *
+ * This function provides protection for the legacy API by restricting the
+ * calling context.
+ */
+static inline bool ib_safe_file_access(struct file *filp)
+{
+	return filp->f_cred == current_cred() && !uaccess_kernel();
+}
+
+#endif /* _RDMA_IB_H */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
new file mode 100644
index 000000000..8c5c9582c
--- /dev/null
+++ b/include/rdma/ib_addr.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef IB_ADDR_H
+#define IB_ADDR_H
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/socket.h>
+#include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/if_inet6.h>
+#include <net/ip.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <net/net_namespace.h>
+
+/**
+ * struct rdma_dev_addr - Contains resolved RDMA hardware addresses
+ * @src_dev_addr:	Source MAC address.
+ * @dst_dev_addr:	Destination MAC address.
+ * @broadcast:		Broadcast address of the device.
+ * @dev_type:		The interface hardware type of the device.
+ * @bound_dev_if:	An optional device interface index.
+ * @transport:		The transport type used.
+ * @net:		Network namespace containing the bound_dev_if net_dev.
+ * @sgid_attr:		GID attribute to use for identified SGID
+ */
+struct rdma_dev_addr {
+	unsigned char src_dev_addr[MAX_ADDR_LEN];
+	unsigned char dst_dev_addr[MAX_ADDR_LEN];
+	unsigned char broadcast[MAX_ADDR_LEN];
+	unsigned short dev_type;
+	int bound_dev_if;
+	enum rdma_transport_type transport;
+	struct net *net;
+	const struct ib_gid_attr *sgid_attr;
+	enum rdma_network_type network;
+	int hoplimit;
+};
+
+/**
+ * rdma_translate_ip - Translate a local IP address to an RDMA hardware
+ *   address.
+ *
+ * The dev_addr->net field must be initialized.
+ */
+int rdma_translate_ip(const struct sockaddr *addr,
+		      struct rdma_dev_addr *dev_addr);
+
+/**
+ * rdma_resolve_ip - Resolve source and destination IP addresses to
+ *   RDMA hardware addresses.
+ * @src_addr: An optional source address to use in the resolution.  If a
+ *   source address is not provided, a usable address will be returned via
+ *   the callback.
+ * @dst_addr: The destination address to resolve.
+ * @addr: A reference to a data location that will receive the resolved
+ *   addresses.  The data location must remain valid until the callback has
+ *   been invoked. The net field of the addr struct must be valid.
+ * @timeout_ms: Amount of time to wait for the address resolution to complete.
+ * @callback: Call invoked once address resolution has completed, timed out,
+ *   or been canceled.  A status of 0 indicates success.
+ * @resolve_by_gid_attr:	Resolve the ip based on the GID attribute from
+ *				rdma_dev_addr.
+ * @context: User-specified context associated with the call.
+ */
+int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
+		    struct rdma_dev_addr *addr, unsigned long timeout_ms,
+		    void (*callback)(int status, struct sockaddr *src_addr,
+				     struct rdma_dev_addr *addr, void *context),
+		    bool resolve_by_gid_attr, void *context);
+
+void rdma_addr_cancel(struct rdma_dev_addr *addr);
+
+int rdma_addr_size(const struct sockaddr *addr);
+int rdma_addr_size_in6(struct sockaddr_in6 *addr);
+int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
+
+static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
+{
+	return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
+}
+
+static inline void ib_addr_set_pkey(struct rdma_dev_addr *dev_addr, u16 pkey)
+{
+	dev_addr->broadcast[8] = pkey >> 8;
+	dev_addr->broadcast[9] = (unsigned char) pkey;
+}
+
+static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr,
+				    union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
+}
+
+static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
+{
+	return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
+}
+
+static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
+{
+	return is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 0xffff;
+}
+
+static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
+{
+	switch (addr->sa_family) {
+	case AF_INET:
+		ipv6_addr_set_v4mapped(((struct sockaddr_in *)
+					addr)->sin_addr.s_addr,
+				       (struct in6_addr *)gid);
+		break;
+	case AF_INET6:
+		*(struct in6_addr *)&gid->raw =
+			((struct sockaddr_in6 *)addr)->sin6_addr;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
+static inline void rdma_gid2ip(struct sockaddr *out, const union ib_gid *gid)
+{
+	if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
+		struct sockaddr_in *out_in = (struct sockaddr_in *)out;
+		memset(out_in, 0, sizeof(*out_in));
+		out_in->sin_family = AF_INET;
+		memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
+	} else {
+		struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
+		memset(out_in, 0, sizeof(*out_in));
+		out_in->sin6_family = AF_INET6;
+		memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
+	}
+}
+
+/*
+ * rdma_get/set_sgid/dgid() APIs are applicable to IB, and iWarp.
+ * They are not applicable to RoCE.
+ * RoCE GIDs are derived from the IP addresses.
+ */
+static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr),
+	       sizeof(*gid));
+}
+
+static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+	memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
+}
+
+static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+	memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
+}
+
+static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
+{
+	memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
+}
+
+static inline enum ib_mtu iboe_get_mtu(int mtu)
+{
+	/*
+	 * Reduce IB headers from effective IBoE MTU.
+	 */
+	mtu = mtu - (IB_GRH_BYTES + IB_UDP_BYTES + IB_BTH_BYTES +
+		     IB_EXT_XRC_BYTES + IB_EXT_ATOMICETH_BYTES +
+		     IB_ICRC_BYTES);
+
+	if (mtu >= ib_mtu_enum_to_int(IB_MTU_4096))
+		return IB_MTU_4096;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_2048))
+		return IB_MTU_2048;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_1024))
+		return IB_MTU_1024;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_512))
+		return IB_MTU_512;
+	else if (mtu >= ib_mtu_enum_to_int(IB_MTU_256))
+		return IB_MTU_256;
+	else
+		return 0;
+}
+
+static inline int rdma_link_local_addr(struct in6_addr *addr)
+{
+	if (addr->s6_addr32[0] == htonl(0xfe800000) &&
+	    addr->s6_addr32[1] == 0)
+		return 1;
+
+	return 0;
+}
+
+static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
+{
+	memcpy(mac, &addr->s6_addr[8], 3);
+	memcpy(mac + 3, &addr->s6_addr[13], 3);
+	mac[0] ^= 2;
+}
+
+static inline int rdma_is_multicast_addr(struct in6_addr *addr)
+{
+	__be32 ipv4_addr;
+
+	if (addr->s6_addr[0] == 0xff)
+		return 1;
+
+	ipv4_addr = addr->s6_addr32[3];
+	return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr));
+}
+
+static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
+{
+	int i;
+
+	mac[0] = 0x33;
+	mac[1] = 0x33;
+	for (i = 2; i < 6; ++i)
+		mac[i] = addr->s6_addr[i + 10];
+}
+
+static inline u16 rdma_get_vlan_id(union ib_gid *dgid)
+{
+	u16 vid;
+
+	vid = dgid->raw[11] << 8 | dgid->raw[12];
+	return vid < 0x1000 ? vid : 0xffff;
+}
+
+static inline struct net_device *rdma_vlan_dev_real_dev(const struct net_device *dev)
+{
+	return is_vlan_dev(dev) ? vlan_dev_real_dev(dev) : NULL;
+}
+
+#endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
new file mode 100644
index 000000000..bae29f50a
--- /dev/null
+++ b/include/rdma/ib_cache.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ */
+
+#ifndef _IB_CACHE_H
+#define _IB_CACHE_H
+
+#include <rdma/ib_verbs.h>
+
+int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
+		   union ib_gid *gid);
+void *rdma_read_gid_hw_context(const struct ib_gid_attr *attr);
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+					const union ib_gid *gid,
+					enum ib_gid_type gid_type,
+					struct net_device *ndev);
+const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev,
+						const union ib_gid *gid,
+						enum ib_gid_type gid_type,
+						u8 port,
+						struct net_device *ndev);
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+	struct ib_device *device, const union ib_gid *gid, u8 port_num,
+	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+		       void *),
+	void *context);
+
+int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
+			    u16 *vlan_id, u8 *smac);
+struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
+
+/**
+ * ib_get_cached_pkey - Returns a cached PKey table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @index: The index into the cached PKey table to query.
+ * @pkey: The PKey value found at the specified index.
+ *
+ * ib_get_cached_pkey() fetches the specified PKey table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_pkey(struct ib_device    *device_handle,
+		       u8                   port_num,
+		       int                  index,
+		       u16                 *pkey);
+
+/**
+ * ib_find_cached_pkey - Returns the PKey table index where a specified
+ *   PKey value occurs.
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the cached PKey table where the PKey was found.
+ *
+ * ib_find_cached_pkey() searches the specified PKey table in
+ * the local software cache.
+ */
+int ib_find_cached_pkey(struct ib_device    *device,
+			u8                   port_num,
+			u16                  pkey,
+			u16                 *index);
+
+/**
+ * ib_find_exact_cached_pkey - Returns the PKey table index where a specified
+ *   PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit)
+ * @device: The device to query.
+ * @port_num: The port number of the device to search for the PKey.
+ * @pkey: The PKey value to search for.
+ * @index: The index into the cached PKey table where the PKey was found.
+ *
+ * ib_find_exact_cached_pkey() searches the specified PKey table in
+ * the local software cache.
+ */
+int ib_find_exact_cached_pkey(struct ib_device    *device,
+			      u8                   port_num,
+			      u16                  pkey,
+			      u16                 *index);
+
+/**
+ * ib_get_cached_lmc - Returns a cached lmc table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @lmc: The lmc value for the specified port for that device.
+ *
+ * ib_get_cached_lmc() fetches the specified lmc table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_lmc(struct ib_device *device,
+		      u8                port_num,
+		      u8                *lmc);
+
+/**
+ * ib_get_cached_port_state - Returns a cached port state table entry
+ * @device: The device to query.
+ * @port_num: The port number of the device to query.
+ * @port_state: port_state for the specified port for that device.
+ *
+ * ib_get_cached_port_state() fetches the specified port_state table entry stored in
+ * the local software cache.
+ */
+int ib_get_cached_port_state(struct ib_device *device,
+			      u8                port_num,
+			      enum ib_port_state *port_active);
+
+bool rdma_is_zero_gid(const union ib_gid *gid);
+const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
+					    u8 port_num, int index);
+void rdma_put_gid_attr(const struct ib_gid_attr *attr);
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+ssize_t rdma_query_gid_table(struct ib_device *device,
+			     struct ib_uverbs_gid_entry *entries,
+			     size_t max_entries);
+
+#endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
new file mode 100644
index 000000000..e23eb357b
--- /dev/null
+++ b/include/rdma/ib_cm.h
@@ -0,0 +1,572 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#ifndef IB_CM_H
+#define IB_CM_H
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_sa.h>
+#include <rdma/rdma_cm.h>
+
+enum ib_cm_state {
+	IB_CM_IDLE,
+	IB_CM_LISTEN,
+	IB_CM_REQ_SENT,
+	IB_CM_REQ_RCVD,
+	IB_CM_MRA_REQ_SENT,
+	IB_CM_MRA_REQ_RCVD,
+	IB_CM_REP_SENT,
+	IB_CM_REP_RCVD,
+	IB_CM_MRA_REP_SENT,
+	IB_CM_MRA_REP_RCVD,
+	IB_CM_ESTABLISHED,
+	IB_CM_DREQ_SENT,
+	IB_CM_DREQ_RCVD,
+	IB_CM_TIMEWAIT,
+	IB_CM_SIDR_REQ_SENT,
+	IB_CM_SIDR_REQ_RCVD
+};
+
+enum ib_cm_lap_state {
+	IB_CM_LAP_UNINIT,
+	IB_CM_LAP_IDLE,
+	IB_CM_LAP_SENT,
+	IB_CM_LAP_RCVD,
+	IB_CM_MRA_LAP_SENT,
+	IB_CM_MRA_LAP_RCVD,
+};
+
+enum ib_cm_event_type {
+	IB_CM_REQ_ERROR,
+	IB_CM_REQ_RECEIVED,
+	IB_CM_REP_ERROR,
+	IB_CM_REP_RECEIVED,
+	IB_CM_RTU_RECEIVED,
+	IB_CM_USER_ESTABLISHED,
+	IB_CM_DREQ_ERROR,
+	IB_CM_DREQ_RECEIVED,
+	IB_CM_DREP_RECEIVED,
+	IB_CM_TIMEWAIT_EXIT,
+	IB_CM_MRA_RECEIVED,
+	IB_CM_REJ_RECEIVED,
+	IB_CM_LAP_ERROR,
+	IB_CM_LAP_RECEIVED,
+	IB_CM_APR_RECEIVED,
+	IB_CM_SIDR_REQ_ERROR,
+	IB_CM_SIDR_REQ_RECEIVED,
+	IB_CM_SIDR_REP_RECEIVED
+};
+
+enum ib_cm_data_size {
+	IB_CM_REQ_PRIVATE_DATA_SIZE	 = 92,
+	IB_CM_MRA_PRIVATE_DATA_SIZE	 = 222,
+	IB_CM_REJ_PRIVATE_DATA_SIZE	 = 148,
+	IB_CM_REP_PRIVATE_DATA_SIZE	 = 196,
+	IB_CM_RTU_PRIVATE_DATA_SIZE	 = 224,
+	IB_CM_DREQ_PRIVATE_DATA_SIZE	 = 220,
+	IB_CM_DREP_PRIVATE_DATA_SIZE	 = 224,
+	IB_CM_REJ_ARI_LENGTH		 = 72,
+	IB_CM_LAP_PRIVATE_DATA_SIZE	 = 168,
+	IB_CM_APR_PRIVATE_DATA_SIZE	 = 148,
+	IB_CM_APR_INFO_LENGTH		 = 72,
+	IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216,
+	IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136,
+	IB_CM_SIDR_REP_INFO_LENGTH	 = 72,
+};
+
+struct ib_cm_id;
+
+struct ib_cm_req_event_param {
+	struct ib_cm_id		*listen_id;
+
+	/* P_Key that was used by the GMP's BTH header */
+	u16			bth_pkey;
+
+	u8			port;
+
+	struct sa_path_rec	*primary_path;
+	struct sa_path_rec	*alternate_path;
+
+	/*
+	 * SGID attribute of the primary path. Currently only
+	 * useful for RoCE. Alternate path GID attributes
+	 * are not yet supported.
+	 */
+	const struct ib_gid_attr *ppath_sgid_attr;
+
+	__be64			remote_ca_guid;
+	u32			remote_qkey;
+	u32			remote_qpn;
+	enum ib_qp_type		qp_type;
+
+	u32			starting_psn;
+	u8			responder_resources;
+	u8			initiator_depth;
+	unsigned int		local_cm_response_timeout:5;
+	unsigned int		flow_control:1;
+	unsigned int		remote_cm_response_timeout:5;
+	unsigned int		retry_count:3;
+	unsigned int		rnr_retry_count:3;
+	unsigned int		srq:1;
+	struct rdma_ucm_ece	ece;
+};
+
+struct ib_cm_rep_event_param {
+	__be64			remote_ca_guid;
+	u32			remote_qkey;
+	u32			remote_qpn;
+	u32			starting_psn;
+	u8			responder_resources;
+	u8			initiator_depth;
+	unsigned int		target_ack_delay:5;
+	unsigned int		failover_accepted:2;
+	unsigned int		flow_control:1;
+	unsigned int		rnr_retry_count:3;
+	unsigned int		srq:1;
+	struct rdma_ucm_ece	ece;
+};
+
+enum ib_cm_rej_reason {
+	IB_CM_REJ_NO_QP				= 1,
+	IB_CM_REJ_NO_EEC			= 2,
+	IB_CM_REJ_NO_RESOURCES			= 3,
+	IB_CM_REJ_TIMEOUT			= 4,
+	IB_CM_REJ_UNSUPPORTED			= 5,
+	IB_CM_REJ_INVALID_COMM_ID		= 6,
+	IB_CM_REJ_INVALID_COMM_INSTANCE		= 7,
+	IB_CM_REJ_INVALID_SERVICE_ID		= 8,
+	IB_CM_REJ_INVALID_TRANSPORT_TYPE	= 9,
+	IB_CM_REJ_STALE_CONN			= 10,
+	IB_CM_REJ_RDC_NOT_EXIST			= 11,
+	IB_CM_REJ_INVALID_GID			= 12,
+	IB_CM_REJ_INVALID_LID			= 13,
+	IB_CM_REJ_INVALID_SL			= 14,
+	IB_CM_REJ_INVALID_TRAFFIC_CLASS		= 15,
+	IB_CM_REJ_INVALID_HOP_LIMIT		= 16,
+	IB_CM_REJ_INVALID_PACKET_RATE		= 17,
+	IB_CM_REJ_INVALID_ALT_GID		= 18,
+	IB_CM_REJ_INVALID_ALT_LID		= 19,
+	IB_CM_REJ_INVALID_ALT_SL		= 20,
+	IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS	= 21,
+	IB_CM_REJ_INVALID_ALT_HOP_LIMIT		= 22,
+	IB_CM_REJ_INVALID_ALT_PACKET_RATE	= 23,
+	IB_CM_REJ_PORT_CM_REDIRECT		= 24,
+	IB_CM_REJ_PORT_REDIRECT			= 25,
+	IB_CM_REJ_INVALID_MTU			= 26,
+	IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES	= 27,
+	IB_CM_REJ_CONSUMER_DEFINED		= 28,
+	IB_CM_REJ_INVALID_RNR_RETRY		= 29,
+	IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID	= 30,
+	IB_CM_REJ_INVALID_CLASS_VERSION		= 31,
+	IB_CM_REJ_INVALID_FLOW_LABEL		= 32,
+	IB_CM_REJ_INVALID_ALT_FLOW_LABEL	= 33,
+	IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED	= 35,
+};
+
+struct ib_cm_rej_event_param {
+	enum ib_cm_rej_reason	reason;
+	void			*ari;
+	u8			ari_length;
+};
+
+struct ib_cm_mra_event_param {
+	u8	service_timeout;
+};
+
+struct ib_cm_lap_event_param {
+	struct sa_path_rec	*alternate_path;
+};
+
+enum ib_cm_apr_status {
+	IB_CM_APR_SUCCESS,
+	IB_CM_APR_INVALID_COMM_ID,
+	IB_CM_APR_UNSUPPORTED,
+	IB_CM_APR_REJECT,
+	IB_CM_APR_REDIRECT,
+	IB_CM_APR_IS_CURRENT,
+	IB_CM_APR_INVALID_QPN_EECN,
+	IB_CM_APR_INVALID_LID,
+	IB_CM_APR_INVALID_GID,
+	IB_CM_APR_INVALID_FLOW_LABEL,
+	IB_CM_APR_INVALID_TCLASS,
+	IB_CM_APR_INVALID_HOP_LIMIT,
+	IB_CM_APR_INVALID_PACKET_RATE,
+	IB_CM_APR_INVALID_SL
+};
+
+struct ib_cm_apr_event_param {
+	enum ib_cm_apr_status	ap_status;
+	void			*apr_info;
+	u8			info_len;
+};
+
+struct ib_cm_sidr_req_event_param {
+	struct ib_cm_id		*listen_id;
+	__be64			service_id;
+
+	/*
+	 * SGID attribute of the request. Currently only
+	 * useful for RoCE.
+	 */
+	const struct ib_gid_attr *sgid_attr;
+	/* P_Key that was used by the GMP's BTH header */
+	u16			bth_pkey;
+	u8			port;
+	u16			pkey;
+};
+
+enum ib_cm_sidr_status {
+	IB_SIDR_SUCCESS,
+	IB_SIDR_UNSUPPORTED,
+	IB_SIDR_REJECT,
+	IB_SIDR_NO_QP,
+	IB_SIDR_REDIRECT,
+	IB_SIDR_UNSUPPORTED_VERSION
+};
+
+struct ib_cm_sidr_rep_event_param {
+	enum ib_cm_sidr_status	status;
+	u32			qkey;
+	u32			qpn;
+	void			*info;
+	const struct ib_gid_attr *sgid_attr;
+	u8			info_len;
+};
+
+struct ib_cm_event {
+	enum ib_cm_event_type	event;
+	union {
+		struct ib_cm_req_event_param	req_rcvd;
+		struct ib_cm_rep_event_param	rep_rcvd;
+		/* No data for RTU received events. */
+		struct ib_cm_rej_event_param	rej_rcvd;
+		struct ib_cm_mra_event_param	mra_rcvd;
+		struct ib_cm_lap_event_param	lap_rcvd;
+		struct ib_cm_apr_event_param	apr_rcvd;
+		/* No data for DREQ/DREP received events. */
+		struct ib_cm_sidr_req_event_param sidr_req_rcvd;
+		struct ib_cm_sidr_rep_event_param sidr_rep_rcvd;
+		enum ib_wc_status		send_status;
+	} param;
+
+	void			*private_data;
+};
+
+#define CM_REQ_ATTR_ID		cpu_to_be16(0x0010)
+#define CM_MRA_ATTR_ID		cpu_to_be16(0x0011)
+#define CM_REJ_ATTR_ID		cpu_to_be16(0x0012)
+#define CM_REP_ATTR_ID		cpu_to_be16(0x0013)
+#define CM_RTU_ATTR_ID		cpu_to_be16(0x0014)
+#define CM_DREQ_ATTR_ID		cpu_to_be16(0x0015)
+#define CM_DREP_ATTR_ID		cpu_to_be16(0x0016)
+#define CM_SIDR_REQ_ATTR_ID	cpu_to_be16(0x0017)
+#define CM_SIDR_REP_ATTR_ID	cpu_to_be16(0x0018)
+#define CM_LAP_ATTR_ID		cpu_to_be16(0x0019)
+#define CM_APR_ATTR_ID		cpu_to_be16(0x001A)
+
+/**
+ * ib_cm_handler - User-defined callback to process communication events.
+ * @cm_id: Communication identifier associated with the reported event.
+ * @event: Information about the communication event.
+ *
+ * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events
+ * generated as a result of listen requests result in the allocation of a
+ * new @cm_id.  The new @cm_id is returned to the user through this callback.
+ * Clients are responsible for destroying the new @cm_id.  For peer-to-peer
+ * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds
+ * to a user's existing communication identifier.
+ *
+ * Users may not call ib_destroy_cm_id while in the context of this callback;
+ * however, returning a non-zero value instructs the communication manager to
+ * destroy the @cm_id after the callback completes.
+ */
+typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
+			     const struct ib_cm_event *event);
+
+struct ib_cm_id {
+	ib_cm_handler		cm_handler;
+	void			*context;
+	struct ib_device	*device;
+	__be64			service_id;
+	__be64			service_mask;
+	enum ib_cm_state	state;		/* internal CM/debug use */
+	enum ib_cm_lap_state	lap_state;	/* internal CM/debug use */
+	__be32			local_id;
+	__be32			remote_id;
+	u32			remote_cm_qpn;  /* 1 unless redirected */
+};
+
+/**
+ * ib_create_cm_id - Allocate a communication identifier.
+ * @device: Device associated with the cm_id.  All related communication will
+ * be associated with the specified device.
+ * @cm_handler: Callback invoked to notify the user of CM events.
+ * @context: User specified context associated with the communication
+ *   identifier.
+ *
+ * Communication identifiers are used to track connection states, service
+ * ID resolution requests, and listen requests.
+ */
+struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
+				 ib_cm_handler cm_handler,
+				 void *context);
+
+/**
+ * ib_destroy_cm_id - Destroy a connection identifier.
+ * @cm_id: Connection identifier to destroy.
+ *
+ * This call blocks until the connection identifier is destroyed.
+ */
+void ib_destroy_cm_id(struct ib_cm_id *cm_id);
+
+#define IB_SERVICE_ID_AGN_MASK	cpu_to_be64(0xFF00000000000000ULL)
+#define IB_CM_ASSIGN_SERVICE_ID	cpu_to_be64(0x0200000000000000ULL)
+#define IB_CMA_SERVICE_ID	cpu_to_be64(0x0000000001000000ULL)
+#define IB_CMA_SERVICE_ID_MASK	cpu_to_be64(0xFFFFFFFFFF000000ULL)
+#define IB_SDP_SERVICE_ID	cpu_to_be64(0x0000000000010000ULL)
+#define IB_SDP_SERVICE_ID_MASK	cpu_to_be64(0xFFFFFFFFFFFF0000ULL)
+
+/**
+ * ib_cm_listen - Initiates listening on the specified service ID for
+ *   connection and service ID resolution requests.
+ * @cm_id: Connection identifier associated with the listen request.
+ * @service_id: Service identifier matched against incoming connection
+ *   and service ID resolution requests.  The service ID should be specified
+ *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
+ *   assign a service ID to the caller.
+ * @service_mask: Mask applied to service ID used to listen across a
+ *   range of service IDs.  If set to 0, the service ID is matched
+ *   exactly.  This parameter is ignored if %service_id is set to
+ *   IB_CM_ASSIGN_SERVICE_ID.
+ */
+int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
+		 __be64 service_mask);
+
+struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
+				     ib_cm_handler cm_handler,
+				     __be64 service_id);
+
+struct ib_cm_req_param {
+	struct sa_path_rec	*primary_path;
+	struct sa_path_rec	*alternate_path;
+	const struct ib_gid_attr *ppath_sgid_attr;
+	__be64			service_id;
+	u32			qp_num;
+	enum ib_qp_type		qp_type;
+	u32			starting_psn;
+	const void		*private_data;
+	u8			private_data_len;
+	u8			responder_resources;
+	u8			initiator_depth;
+	u8			remote_cm_response_timeout;
+	u8			flow_control;
+	u8			local_cm_response_timeout;
+	u8			retry_count;
+	u8			rnr_retry_count;
+	u8			max_cm_retries;
+	u8			srq;
+	struct rdma_ucm_ece	ece;
+};
+
+/**
+ * ib_send_cm_req - Sends a connection request to the remote node.
+ * @cm_id: Connection identifier that will be associated with the
+ *   connection request.
+ * @param: Connection request information needed to establish the
+ *   connection.
+ */
+int ib_send_cm_req(struct ib_cm_id *cm_id,
+		   struct ib_cm_req_param *param);
+
+struct ib_cm_rep_param {
+	u32		qp_num;
+	u32		starting_psn;
+	const void	*private_data;
+	u8		private_data_len;
+	u8		responder_resources;
+	u8		initiator_depth;
+	u8		failover_accepted;
+	u8		flow_control;
+	u8		rnr_retry_count;
+	u8		srq;
+	struct rdma_ucm_ece ece;
+};
+
+/**
+ * ib_send_cm_rep - Sends a connection reply in response to a connection
+ *   request.
+ * @cm_id: Connection identifier that will be associated with the
+ *   connection request.
+ * @param: Connection reply information needed to establish the
+ *   connection.
+ */
+int ib_send_cm_rep(struct ib_cm_id *cm_id,
+		   struct ib_cm_rep_param *param);
+
+/**
+ * ib_send_cm_rtu - Sends a connection ready to use message in response
+ *   to a connection reply message.
+ * @cm_id: Connection identifier associated with the connection request.
+ * @private_data: Optional user-defined private data sent with the
+ *   ready to use message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_rtu(struct ib_cm_id *cm_id,
+		   const void *private_data,
+		   u8 private_data_len);
+
+/**
+ * ib_send_cm_dreq - Sends a disconnection request for an existing
+ *   connection.
+ * @cm_id: Connection identifier associated with the connection being
+ *   released.
+ * @private_data: Optional user-defined private data sent with the
+ *   disconnection request message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_dreq(struct ib_cm_id *cm_id,
+		    const void *private_data,
+		    u8 private_data_len);
+
+/**
+ * ib_send_cm_drep - Sends a disconnection reply to a disconnection request.
+ * @cm_id: Connection identifier associated with the connection being
+ *   released.
+ * @private_data: Optional user-defined private data sent with the
+ *   disconnection reply message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ *
+ * If the cm_id is in the correct state, the CM will transition the connection
+ * to the timewait state, even if an error occurs sending the DREP message.
+ */
+int ib_send_cm_drep(struct ib_cm_id *cm_id,
+		    const void *private_data,
+		    u8 private_data_len);
+
+/**
+ * ib_cm_notify - Notifies the CM of an event reported to the consumer.
+ * @cm_id: Connection identifier to transition to established.
+ * @event: Type of event.
+ *
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events.  Events that should be reported to the CM and
+ * when to report them are:
+ *
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ *    QP before an RTU has been received.
+ * IB_EVENT_PATH_MIG - Notifies the CM that the connection has failed over
+ *   to the alternate path.
+ */
+int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event);
+
+/**
+ * ib_send_cm_rej - Sends a connection rejection message to the
+ *   remote node.
+ * @cm_id: Connection identifier associated with the connection being
+ *   rejected.
+ * @reason: Reason for the connection request rejection.
+ * @ari: Optional additional rejection information.
+ * @ari_length: Size of the additional rejection information, in bytes.
+ * @private_data: Optional user-defined private data sent with the
+ *   rejection message.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_rej(struct ib_cm_id *cm_id,
+		   enum ib_cm_rej_reason reason,
+		   void *ari,
+		   u8 ari_length,
+		   const void *private_data,
+		   u8 private_data_len);
+
+#define IB_CM_MRA_FLAG_DELAY 0x80  /* Send MRA only after a duplicate msg */
+
+/**
+ * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection
+ *   message.
+ * @cm_id: Connection identifier associated with the connection message.
+ * @service_timeout: The lower 5-bits specify the maximum time required for
+ *   the sender to reply to the connection message.  The upper 3-bits
+ *   specify additional control flags.
+ * @private_data: Optional user-defined private data sent with the
+ *   message receipt acknowledgement.
+ * @private_data_len: Size of the private data buffer, in bytes.
+ */
+int ib_send_cm_mra(struct ib_cm_id *cm_id,
+		   u8 service_timeout,
+		   const void *private_data,
+		   u8 private_data_len);
+
+/**
+ * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning
+ *   to a specified QP state.
+ * @cm_id: Communication identifier associated with the QP attributes to
+ *   initialize.
+ * @qp_attr: On input, specifies the desired QP state.  On output, the
+ *   mandatory and desired optional attributes will be set in order to
+ *   modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ *   QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state.  This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes.  Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ */
+int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
+		       struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+struct ib_cm_sidr_req_param {
+	struct sa_path_rec	*path;
+	const struct ib_gid_attr *sgid_attr;
+	__be64			service_id;
+	unsigned long		timeout_ms;
+	const void		*private_data;
+	u8			private_data_len;
+	u8			max_cm_retries;
+};
+
+/**
+ * ib_send_cm_sidr_req - Sends a service ID resolution request to the
+ *   remote node.
+ * @cm_id: Communication identifier that will be associated with the
+ *   service ID resolution request.
+ * @param: Service ID resolution request information.
+ */
+int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
+			struct ib_cm_sidr_req_param *param);
+
+struct ib_cm_sidr_rep_param {
+	u32			qp_num;
+	u32			qkey;
+	enum ib_cm_sidr_status	status;
+	const void		*info;
+	u8			info_length;
+	const void		*private_data;
+	u8			private_data_len;
+	struct rdma_ucm_ece	ece;
+};
+
+/**
+ * ib_send_cm_sidr_rep - Sends a service ID resolution reply to the
+ *   remote node.
+ * @cm_id: Communication identifier associated with the received service ID
+ *   resolution request.
+ * @param: Service ID resolution reply information.
+ */
+int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
+			struct ib_cm_sidr_rep_param *param);
+
+/**
+ * ibcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ ibcm_reject_msg(int reason);
+
+#endif /* IB_CM_H */
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
new file mode 100644
index 000000000..57c1ac881
--- /dev/null
+++ b/include/rdma/ib_hdrs.h
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2016 - 2018 Intel Corporation.
+ */
+
+#ifndef IB_HDRS_H
+#define IB_HDRS_H
+
+#include <linux/types.h>
+#include <asm/unaligned.h>
+#include <rdma/ib_verbs.h>
+
+#define IB_SEQ_NAK	(3 << 29)
+
+/* AETH NAK opcode values */
+#define IB_RNR_NAK                      0x20
+#define IB_NAK_PSN_ERROR                0x60
+#define IB_NAK_INVALID_REQUEST          0x61
+#define IB_NAK_REMOTE_ACCESS_ERROR      0x62
+#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
+#define IB_NAK_INVALID_RD_REQUEST       0x64
+
+#define IB_BTH_REQ_ACK		BIT(31)
+#define IB_BTH_SOLICITED	BIT(23)
+#define IB_BTH_MIG_REQ		BIT(22)
+
+#define IB_GRH_VERSION		6
+#define IB_GRH_VERSION_MASK	0xF
+#define IB_GRH_VERSION_SHIFT	28
+#define IB_GRH_TCLASS_MASK	0xFF
+#define IB_GRH_TCLASS_SHIFT	20
+#define IB_GRH_FLOW_MASK	0xFFFFF
+#define IB_GRH_FLOW_SHIFT	0
+#define IB_GRH_NEXT_HDR		0x1B
+#define IB_FECN_SHIFT 31
+#define IB_FECN_MASK 1
+#define IB_FECN_SMASK BIT(IB_FECN_SHIFT)
+#define IB_BECN_SHIFT 30
+#define IB_BECN_MASK 1
+#define IB_BECN_SMASK BIT(IB_BECN_SHIFT)
+
+#define IB_AETH_CREDIT_SHIFT	24
+#define IB_AETH_CREDIT_MASK	0x1F
+#define IB_AETH_CREDIT_INVAL	0x1F
+#define IB_AETH_NAK_SHIFT	29
+#define IB_MSN_MASK		0xFFFFFF
+
+struct ib_reth {
+	__be64 vaddr;        /* potentially unaligned */
+	__be32 rkey;
+	__be32 length;
+} __packed;
+
+struct ib_atomic_eth {
+	__be64 vaddr;        /* potentially unaligned */
+	__be32 rkey;
+	__be64 swap_data;    /* potentially unaligned */
+	__be64 compare_data; /* potentially unaligned */
+} __packed;
+
+#include <rdma/tid_rdma_defs.h>
+
+union ib_ehdrs {
+	struct {
+		__be32 deth[2];
+		__be32 imm_data;
+	} ud;
+	struct {
+		struct ib_reth reth;
+		__be32 imm_data;
+	} rc;
+	struct {
+		__be32 aeth;
+		__be64 atomic_ack_eth; /* potentially unaligned */
+	} __packed at;
+	__be32 imm_data;
+	__be32 aeth;
+	__be32 ieth;
+	struct ib_atomic_eth atomic_eth;
+	/* TID RDMA headers */
+	union {
+		struct tid_rdma_read_req r_req;
+		struct tid_rdma_read_resp r_rsp;
+		struct tid_rdma_write_req w_req;
+		struct tid_rdma_write_resp w_rsp;
+		struct tid_rdma_write_data w_data;
+		struct tid_rdma_resync resync;
+		struct tid_rdma_ack ack;
+	} tid_rdma;
+}  __packed;
+
+struct ib_other_headers {
+	__be32 bth[3];
+	union ib_ehdrs u;
+} __packed;
+
+struct ib_header {
+	__be16 lrh[4];
+	union {
+		struct {
+			struct ib_grh grh;
+			struct ib_other_headers oth;
+		} l;
+		struct ib_other_headers oth;
+	} u;
+} __packed;
+
+/* accessors for unaligned __be64 items */
+
+static inline u64 ib_u64_get(__be64 *p)
+{
+	return get_unaligned_be64(p);
+}
+
+static inline void ib_u64_put(u64 val, __be64 *p)
+{
+	put_unaligned_be64(val, p);
+}
+
+static inline u64 get_ib_reth_vaddr(struct ib_reth *reth)
+{
+	return ib_u64_get(&reth->vaddr);
+}
+
+static inline void put_ib_reth_vaddr(u64 val, struct ib_reth *reth)
+{
+	ib_u64_put(val, &reth->vaddr);
+}
+
+static inline u64 get_ib_ateth_vaddr(struct ib_atomic_eth *ateth)
+{
+	return ib_u64_get(&ateth->vaddr);
+}
+
+static inline void put_ib_ateth_vaddr(u64 val, struct ib_atomic_eth *ateth)
+{
+	ib_u64_put(val, &ateth->vaddr);
+}
+
+static inline u64 get_ib_ateth_swap(struct ib_atomic_eth *ateth)
+{
+	return ib_u64_get(&ateth->swap_data);
+}
+
+static inline void put_ib_ateth_swap(u64 val, struct ib_atomic_eth *ateth)
+{
+	ib_u64_put(val, &ateth->swap_data);
+}
+
+static inline u64 get_ib_ateth_compare(struct ib_atomic_eth *ateth)
+{
+	return ib_u64_get(&ateth->compare_data);
+}
+
+static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth)
+{
+	ib_u64_put(val, &ateth->compare_data);
+}
+
+/*
+ * 9B/IB Packet Format
+ */
+#define IB_LNH_MASK		3
+#define IB_SC_MASK		0xf
+#define IB_SC_SHIFT		12
+#define IB_SC5_MASK		0x10
+#define IB_SL_MASK		0xf
+#define IB_SL_SHIFT		4
+#define IB_SL_SHIFT		4
+#define IB_LVER_MASK	0xf
+#define IB_LVER_SHIFT	8
+
+static inline u8 ib_get_lnh(struct ib_header *hdr)
+{
+	return (be16_to_cpu(hdr->lrh[0]) & IB_LNH_MASK);
+}
+
+static inline u8 ib_get_sc(struct ib_header *hdr)
+{
+	return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK);
+}
+
+static inline bool ib_is_sc5(u16 sc5)
+{
+	return !!(sc5 & IB_SC5_MASK);
+}
+
+static inline u8 ib_get_sl(struct ib_header *hdr)
+{
+	return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK);
+}
+
+static inline u16 ib_get_dlid(struct ib_header *hdr)
+{
+	return (be16_to_cpu(hdr->lrh[1]));
+}
+
+static inline u16 ib_get_slid(struct ib_header *hdr)
+{
+	return (be16_to_cpu(hdr->lrh[3]));
+}
+
+static inline u8 ib_get_lver(struct ib_header *hdr)
+{
+	return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) &
+		   IB_LVER_MASK);
+}
+
+static inline u16 ib_get_len(struct ib_header *hdr)
+{
+	return (u16)(be16_to_cpu(hdr->lrh[2]));
+}
+
+static inline u32 ib_get_qkey(struct ib_other_headers *ohdr)
+{
+	return be32_to_cpu(ohdr->u.ud.deth[0]);
+}
+
+static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
+{
+	return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK);
+}
+
+/*
+ * BTH
+ */
+#define IB_BTH_OPCODE_MASK	0xff
+#define IB_BTH_OPCODE_SHIFT	24
+#define IB_BTH_PAD_MASK	3
+#define IB_BTH_PKEY_MASK	0xffff
+#define IB_BTH_PAD_SHIFT	20
+#define IB_BTH_A_MASK		1
+#define IB_BTH_A_SHIFT		31
+#define IB_BTH_M_MASK		1
+#define IB_BTH_M_SHIFT		22
+#define IB_BTH_SE_MASK		1
+#define IB_BTH_SE_SHIFT	23
+#define IB_BTH_TVER_MASK	0xf
+#define IB_BTH_TVER_SHIFT	16
+
+static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
+{
+	return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_PAD_SHIFT) &
+		   IB_BTH_PAD_MASK);
+}
+
+static inline u16 ib_bth_get_pkey(struct ib_other_headers *ohdr)
+{
+	return (be32_to_cpu(ohdr->bth[0]) & IB_BTH_PKEY_MASK);
+}
+
+static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr)
+{
+	return ((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_OPCODE_SHIFT) &
+		   IB_BTH_OPCODE_MASK);
+}
+
+static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr)
+{
+	return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) &
+		   IB_BTH_A_MASK);
+}
+
+static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr)
+{
+	return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) &
+		    IB_BTH_M_MASK);
+}
+
+static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr)
+{
+	return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) &
+		    IB_BTH_SE_MASK);
+}
+
+static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr)
+{
+	return (u32)(be32_to_cpu(ohdr->bth[2]));
+}
+
+static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr)
+{
+	return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK);
+}
+
+static inline bool ib_bth_get_becn(struct ib_other_headers *ohdr)
+{
+	return (ohdr->bth[1]) & cpu_to_be32(IB_BECN_SMASK);
+}
+
+static inline bool ib_bth_get_fecn(struct ib_other_headers *ohdr)
+{
+	return (ohdr->bth[1]) & cpu_to_be32(IB_FECN_SMASK);
+}
+
+static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr)
+{
+	return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT)  &
+		    IB_BTH_TVER_MASK);
+}
+
+static inline bool ib_bth_is_solicited(struct ib_other_headers *ohdr)
+{
+	return ohdr->bth[0] & cpu_to_be32(IB_BTH_SOLICITED);
+}
+
+static inline bool ib_bth_is_migration(struct ib_other_headers *ohdr)
+{
+	return ohdr->bth[0] & cpu_to_be32(IB_BTH_MIG_REQ);
+}
+#endif                          /* IB_HDRS_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
new file mode 100644
index 000000000..8dfb1ddf3
--- /dev/null
+++ b/include/rdma/ib_mad.h
@@ -0,0 +1,819 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Voltaire Corporation.  All rights reserved.
+ */
+
+#ifndef IB_MAD_H
+#define IB_MAD_H
+
+#include <linux/list.h>
+
+#include <rdma/ib_verbs.h>
+#include <uapi/rdma/ib_user_mad.h>
+
+/* Management base versions */
+#define IB_MGMT_BASE_VERSION			1
+#define OPA_MGMT_BASE_VERSION			0x80
+
+#define OPA_SM_CLASS_VERSION			0x80
+
+/* Management classes */
+#define IB_MGMT_CLASS_SUBN_LID_ROUTED		0x01
+#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE	0x81
+#define IB_MGMT_CLASS_SUBN_ADM			0x03
+#define IB_MGMT_CLASS_PERF_MGMT			0x04
+#define IB_MGMT_CLASS_BM			0x05
+#define IB_MGMT_CLASS_DEVICE_MGMT		0x06
+#define IB_MGMT_CLASS_CM			0x07
+#define IB_MGMT_CLASS_SNMP			0x08
+#define IB_MGMT_CLASS_DEVICE_ADM		0x10
+#define IB_MGMT_CLASS_BOOT_MGMT			0x11
+#define IB_MGMT_CLASS_BIS			0x12
+#define IB_MGMT_CLASS_CONG_MGMT			0x21
+#define IB_MGMT_CLASS_VENDOR_RANGE2_START	0x30
+#define IB_MGMT_CLASS_VENDOR_RANGE2_END		0x4F
+
+#define	IB_OPENIB_OUI				(0x001405)
+
+/* Management methods */
+#define IB_MGMT_METHOD_GET			0x01
+#define IB_MGMT_METHOD_SET			0x02
+#define IB_MGMT_METHOD_GET_RESP			0x81
+#define IB_MGMT_METHOD_SEND			0x03
+#define IB_MGMT_METHOD_TRAP			0x05
+#define IB_MGMT_METHOD_REPORT			0x06
+#define IB_MGMT_METHOD_REPORT_RESP		0x86
+#define IB_MGMT_METHOD_TRAP_REPRESS		0x07
+
+#define IB_MGMT_METHOD_RESP			0x80
+#define IB_BM_ATTR_MOD_RESP			cpu_to_be32(1)
+
+#define IB_MGMT_MAX_METHODS			128
+
+/* MAD Status field bit masks */
+#define IB_MGMT_MAD_STATUS_SUCCESS			0x0000
+#define IB_MGMT_MAD_STATUS_BUSY				0x0001
+#define IB_MGMT_MAD_STATUS_REDIRECT_REQD		0x0002
+#define IB_MGMT_MAD_STATUS_BAD_VERSION			0x0004
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD		0x0008
+#define IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB	0x000c
+#define IB_MGMT_MAD_STATUS_INVALID_ATTRIB_VALUE		0x001c
+
+/* RMPP information */
+#define IB_MGMT_RMPP_VERSION			1
+
+#define IB_MGMT_RMPP_TYPE_DATA			1
+#define IB_MGMT_RMPP_TYPE_ACK			2
+#define IB_MGMT_RMPP_TYPE_STOP			3
+#define IB_MGMT_RMPP_TYPE_ABORT			4
+
+#define IB_MGMT_RMPP_FLAG_ACTIVE		1
+#define IB_MGMT_RMPP_FLAG_FIRST			(1<<1)
+#define IB_MGMT_RMPP_FLAG_LAST			(1<<2)
+
+#define IB_MGMT_RMPP_NO_RESPTIME		0x1F
+
+#define	IB_MGMT_RMPP_STATUS_SUCCESS		0
+#define	IB_MGMT_RMPP_STATUS_RESX		1
+#define	IB_MGMT_RMPP_STATUS_ABORT_MIN		118
+#define	IB_MGMT_RMPP_STATUS_T2L			118
+#define	IB_MGMT_RMPP_STATUS_BAD_LEN		119
+#define	IB_MGMT_RMPP_STATUS_BAD_SEG		120
+#define	IB_MGMT_RMPP_STATUS_BADT		121
+#define	IB_MGMT_RMPP_STATUS_W2S			122
+#define	IB_MGMT_RMPP_STATUS_S2B			123
+#define	IB_MGMT_RMPP_STATUS_BAD_STATUS		124
+#define	IB_MGMT_RMPP_STATUS_UNV			125
+#define	IB_MGMT_RMPP_STATUS_TMR			126
+#define	IB_MGMT_RMPP_STATUS_UNSPEC		127
+#define	IB_MGMT_RMPP_STATUS_ABORT_MAX		127
+
+#define IB_QP0		0
+#define IB_QP1		cpu_to_be32(1)
+#define IB_QP1_QKEY	0x80010000
+#define IB_QP_SET_QKEY	0x80000000
+
+#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF
+#define IB_DEFAULT_PKEY_FULL	0xFFFF
+
+/*
+ * Generic trap/notice types
+ */
+#define IB_NOTICE_TYPE_FATAL	0x80
+#define IB_NOTICE_TYPE_URGENT	0x81
+#define IB_NOTICE_TYPE_SECURITY	0x82
+#define IB_NOTICE_TYPE_SM	0x83
+#define IB_NOTICE_TYPE_INFO	0x84
+
+/*
+ * Generic trap/notice producers
+ */
+#define IB_NOTICE_PROD_CA		cpu_to_be16(1)
+#define IB_NOTICE_PROD_SWITCH		cpu_to_be16(2)
+#define IB_NOTICE_PROD_ROUTER		cpu_to_be16(3)
+#define IB_NOTICE_PROD_CLASS_MGR	cpu_to_be16(4)
+
+enum {
+	IB_MGMT_MAD_HDR = 24,
+	IB_MGMT_MAD_DATA = 232,
+	IB_MGMT_RMPP_HDR = 36,
+	IB_MGMT_RMPP_DATA = 220,
+	IB_MGMT_VENDOR_HDR = 40,
+	IB_MGMT_VENDOR_DATA = 216,
+	IB_MGMT_SA_HDR = 56,
+	IB_MGMT_SA_DATA = 200,
+	IB_MGMT_DEVICE_HDR = 64,
+	IB_MGMT_DEVICE_DATA = 192,
+	IB_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + IB_MGMT_MAD_DATA,
+	OPA_MGMT_MAD_DATA = 2024,
+	OPA_MGMT_RMPP_DATA = 2012,
+	OPA_MGMT_MAD_SIZE = IB_MGMT_MAD_HDR + OPA_MGMT_MAD_DATA,
+};
+
+struct ib_mad_hdr {
+	u8	base_version;
+	u8	mgmt_class;
+	u8	class_version;
+	u8	method;
+	__be16	status;
+	__be16	class_specific;
+	__be64	tid;
+	__be16	attr_id;
+	__be16	resv;
+	__be32	attr_mod;
+};
+
+struct ib_rmpp_hdr {
+	u8	rmpp_version;
+	u8	rmpp_type;
+	u8	rmpp_rtime_flags;
+	u8	rmpp_status;
+	__be32	seg_num;
+	__be32	paylen_newwin;
+};
+
+typedef u64 __bitwise ib_sa_comp_mask;
+
+#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << (n)))
+
+/*
+ * ib_sa_hdr and ib_sa_mad structures must be packed because they have
+ * 64-bit fields that are only 32-bit aligned. 64-bit architectures will
+ * lay them out wrong otherwise.  (And unfortunately they are sent on
+ * the wire so we can't change the layout)
+ */
+struct ib_sa_hdr {
+	__be64			sm_key;
+	__be16			attr_offset;
+	__be16			reserved;
+	ib_sa_comp_mask		comp_mask;
+} __packed;
+
+struct ib_mad {
+	struct ib_mad_hdr	mad_hdr;
+	u8			data[IB_MGMT_MAD_DATA];
+};
+
+struct opa_mad {
+	struct ib_mad_hdr	mad_hdr;
+	u8			data[OPA_MGMT_MAD_DATA];
+};
+
+struct ib_rmpp_mad {
+	struct ib_mad_hdr	mad_hdr;
+	struct ib_rmpp_hdr	rmpp_hdr;
+	u8			data[IB_MGMT_RMPP_DATA];
+};
+
+struct opa_rmpp_mad {
+	struct ib_mad_hdr	mad_hdr;
+	struct ib_rmpp_hdr	rmpp_hdr;
+	u8			data[OPA_MGMT_RMPP_DATA];
+};
+
+struct ib_sa_mad {
+	struct ib_mad_hdr	mad_hdr;
+	struct ib_rmpp_hdr	rmpp_hdr;
+	struct ib_sa_hdr	sa_hdr;
+	u8			data[IB_MGMT_SA_DATA];
+} __packed;
+
+struct ib_vendor_mad {
+	struct ib_mad_hdr	mad_hdr;
+	struct ib_rmpp_hdr	rmpp_hdr;
+	u8			reserved;
+	u8			oui[3];
+	u8			data[IB_MGMT_VENDOR_DATA];
+};
+
+#define IB_MGMT_CLASSPORTINFO_ATTR_ID	cpu_to_be16(0x0001)
+
+#define IB_CLASS_PORT_INFO_RESP_TIME_MASK	0x1F
+#define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5
+
+struct ib_class_port_info {
+	u8			base_version;
+	u8			class_version;
+	__be16			capability_mask;
+	  /* 27 bits for cap_mask2, 5 bits for resp_time */
+	__be32			cap_mask2_resp_time;
+	u8			redirect_gid[16];
+	__be32			redirect_tcslfl;
+	__be16			redirect_lid;
+	__be16			redirect_pkey;
+	__be32			redirect_qp;
+	__be32			redirect_qkey;
+	u8			trap_gid[16];
+	__be32			trap_tcslfl;
+	__be16			trap_lid;
+	__be16			trap_pkey;
+	__be32			trap_hlqp;
+	__be32			trap_qkey;
+};
+
+/* PortInfo CapabilityMask */
+enum ib_port_capability_mask_bits {
+	IB_PORT_SM = 1 << 1,
+	IB_PORT_NOTICE_SUP = 1 << 2,
+	IB_PORT_TRAP_SUP = 1 << 3,
+	IB_PORT_OPT_IPD_SUP = 1 << 4,
+	IB_PORT_AUTO_MIGR_SUP = 1 << 5,
+	IB_PORT_SL_MAP_SUP = 1 << 6,
+	IB_PORT_MKEY_NVRAM = 1 << 7,
+	IB_PORT_PKEY_NVRAM = 1 << 8,
+	IB_PORT_LED_INFO_SUP = 1 << 9,
+	IB_PORT_SM_DISABLED = 1 << 10,
+	IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+	IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+	IB_PORT_CAP_MASK2_SUP = 1 << 15,
+	IB_PORT_CM_SUP = 1 << 16,
+	IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+	IB_PORT_REINIT_SUP = 1 << 18,
+	IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
+	IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
+	IB_PORT_DR_NOTICE_SUP = 1 << 21,
+	IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+	IB_PORT_BOOT_MGMT_SUP = 1 << 23,
+	IB_PORT_LINK_LATENCY_SUP = 1 << 24,
+	IB_PORT_CLIENT_REG_SUP = 1 << 25,
+	IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26,
+	IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+	IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+	IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+	IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30,
+	IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
+};
+
+enum ib_port_capability_mask2_bits {
+	IB_PORT_SET_NODE_DESC_SUP		= 1 << 0,
+	IB_PORT_EX_PORT_INFO_EX_SUP		= 1 << 1,
+	IB_PORT_VIRT_SUP			= 1 << 2,
+	IB_PORT_SWITCH_PORT_STATE_TABLE_SUP	= 1 << 3,
+	IB_PORT_LINK_WIDTH_2X_SUP		= 1 << 4,
+	IB_PORT_LINK_SPEED_HDR_SUP		= 1 << 5,
+};
+
+#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
+
+struct opa_class_port_info {
+	u8 base_version;
+	u8 class_version;
+	__be16 cap_mask;
+	__be32 cap_mask2_resp_time;
+
+	u8 redirect_gid[16];
+	__be32 redirect_tc_fl;
+	__be32 redirect_lid;
+	__be32 redirect_sl_qp;
+	__be32 redirect_qkey;
+
+	u8 trap_gid[16];
+	__be32 trap_tc_fl;
+	__be32 trap_lid;
+	__be32 trap_hl_qp;
+	__be32 trap_qkey;
+
+	__be16 trap_pkey;
+	__be16 redirect_pkey;
+
+	u8 trap_sl_rsvd;
+	u8 reserved[3];
+} __packed;
+
+/**
+ * ib_get_cpi_resp_time - Returns the resp_time value from
+ * cap_mask2_resp_time in ib_class_port_info.
+ * @cpi: A struct ib_class_port_info mad.
+ */
+static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi)
+{
+	return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) &
+		    IB_CLASS_PORT_INFO_RESP_TIME_MASK);
+}
+
+/**
+ * ib_set_cpi_resptime - Sets the response time in an
+ * ib_class_port_info mad.
+ * @cpi: A struct ib_class_port_info.
+ * @rtime: The response time to set.
+ */
+static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi,
+					u8 rtime)
+{
+	cpi->cap_mask2_resp_time =
+		(cpi->cap_mask2_resp_time &
+		 cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
+		cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK);
+}
+
+/**
+ * ib_get_cpi_capmask2 - Returns the capmask2 value from
+ * cap_mask2_resp_time in ib_class_port_info.
+ * @cpi: A struct ib_class_port_info mad.
+ */
+static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi)
+{
+	return (be32_to_cpu(cpi->cap_mask2_resp_time) >>
+		IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
+}
+
+/**
+ * ib_set_cpi_capmask2 - Sets the capmask2 in an
+ * ib_class_port_info mad.
+ * @cpi: A struct ib_class_port_info.
+ * @capmask2: The capmask2 to set.
+ */
+static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi,
+				       u32 capmask2)
+{
+	cpi->cap_mask2_resp_time =
+		(cpi->cap_mask2_resp_time &
+		 cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
+		cpu_to_be32(capmask2 <<
+			    IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
+}
+
+/**
+ * opa_get_cpi_capmask2 - Returns the capmask2 value from
+ * cap_mask2_resp_time in ib_class_port_info.
+ * @cpi: A struct opa_class_port_info mad.
+ */
+static inline u32 opa_get_cpi_capmask2(struct opa_class_port_info *cpi)
+{
+	return (be32_to_cpu(cpi->cap_mask2_resp_time) >>
+		IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
+}
+
+struct ib_mad_notice_attr {
+	u8 generic_type;
+	u8 prod_type_msb;
+	__be16 prod_type_lsb;
+	__be16 trap_num;
+	__be16 issuer_lid;
+	__be16 toggle_count;
+
+	union {
+		struct {
+			u8	details[54];
+		} raw_data;
+
+		struct {
+			__be16	reserved;
+			__be16	lid;		/* where violation happened */
+			u8	port_num;	/* where violation happened */
+		} __packed ntc_129_131;
+
+		struct {
+			__be16	reserved;
+			__be16	lid;		/* LID where change occurred */
+			u8	reserved2;
+			u8	local_changes;	/* low bit - local changes */
+			__be32	new_cap_mask;	/* new capability mask */
+			u8	reserved3;
+			u8	change_flags;	/* low 3 bits only */
+		} __packed ntc_144;
+
+		struct {
+			__be16	reserved;
+			__be16	lid;		/* lid where sys guid changed */
+			__be16	reserved2;
+			__be64	new_sys_guid;
+		} __packed ntc_145;
+
+		struct {
+			__be16	reserved;
+			__be16	lid;
+			__be16	dr_slid;
+			u8	method;
+			u8	reserved2;
+			__be16	attr_id;
+			__be32	attr_mod;
+			__be64	mkey;
+			u8	reserved3;
+			u8	dr_trunc_hop;
+			u8	dr_rtn_path[30];
+		} __packed ntc_256;
+
+		struct {
+			__be16		reserved;
+			__be16		lid1;
+			__be16		lid2;
+			__be32		key;
+			__be32		sl_qp1;	/* SL: high 4 bits */
+			__be32		qp2;	/* high 8 bits reserved */
+			union ib_gid	gid1;
+			union ib_gid	gid2;
+		} __packed ntc_257_258;
+
+	} details;
+};
+
+/**
+ * ib_mad_send_buf - MAD data buffer and work request for sends.
+ * @next: A pointer used to chain together MADs for posting.
+ * @mad: References an allocated MAD data buffer for MADs that do not have
+ *   RMPP active.  For MADs using RMPP, references the common and management
+ *   class specific headers.
+ * @mad_agent: MAD agent that allocated the buffer.
+ * @ah: The address handle to use when sending the MAD.
+ * @context: User-controlled context fields.
+ * @hdr_len: Indicates the size of the data header of the MAD.  This length
+ *   includes the common MAD, RMPP, and class specific headers.
+ * @data_len: Indicates the total size of user-transferred data.
+ * @seg_count: The number of RMPP segments allocated for this send.
+ * @seg_size: Size of the data in each RMPP segment.  This does not include
+ *   class specific headers.
+ * @seg_rmpp_size: Size of each RMPP segment including the class specific
+ *   headers.
+ * @timeout_ms: Time to wait for a response.
+ * @retries: Number of times to retry a request for a response.  For MADs
+ *   using RMPP, this applies per window.  On completion, returns the number
+ *   of retries needed to complete the transfer.
+ *
+ * Users are responsible for initializing the MAD buffer itself, with the
+ * exception of any RMPP header.  Additional segment buffer space allocated
+ * beyond data_len is padding.
+ */
+struct ib_mad_send_buf {
+	struct ib_mad_send_buf	*next;
+	void			*mad;
+	struct ib_mad_agent	*mad_agent;
+	struct ib_ah		*ah;
+	void			*context[2];
+	int			hdr_len;
+	int			data_len;
+	int			seg_count;
+	int			seg_size;
+	int			seg_rmpp_size;
+	int			timeout_ms;
+	int			retries;
+};
+
+/**
+ * ib_response_mad - Returns if the specified MAD has been generated in
+ *   response to a sent request or trap.
+ */
+int ib_response_mad(const struct ib_mad_hdr *hdr);
+
+/**
+ * ib_get_rmpp_resptime - Returns the RMPP response time.
+ * @rmpp_hdr: An RMPP header.
+ */
+static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr)
+{
+	return rmpp_hdr->rmpp_rtime_flags >> 3;
+}
+
+/**
+ * ib_get_rmpp_flags - Returns the RMPP flags.
+ * @rmpp_hdr: An RMPP header.
+ */
+static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr)
+{
+	return rmpp_hdr->rmpp_rtime_flags & 0x7;
+}
+
+/**
+ * ib_set_rmpp_resptime - Sets the response time in an RMPP header.
+ * @rmpp_hdr: An RMPP header.
+ * @rtime: The response time to set.
+ */
+static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime)
+{
+	rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3);
+}
+
+/**
+ * ib_set_rmpp_flags - Sets the flags in an RMPP header.
+ * @rmpp_hdr: An RMPP header.
+ * @flags: The flags to set.
+ */
+static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags)
+{
+	rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF8) |
+				     (flags & 0x7);
+}
+
+struct ib_mad_agent;
+struct ib_mad_send_wc;
+struct ib_mad_recv_wc;
+
+/**
+ * ib_mad_send_handler - callback handler for a sent MAD.
+ * @mad_agent: MAD agent that sent the MAD.
+ * @mad_send_wc: Send work completion information on the sent MAD.
+ */
+typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent,
+				    struct ib_mad_send_wc *mad_send_wc);
+
+/**
+ * ib_mad_recv_handler - callback handler for a received MAD.
+ * @mad_agent: MAD agent requesting the received MAD.
+ * @send_buf: Send buffer if found, else NULL
+ * @mad_recv_wc: Received work completion information on the received MAD.
+ *
+ * MADs received in response to a send request operation will be handed to
+ * the user before the send operation completes.  All data buffers given
+ * to registered agents through this routine are owned by the receiving
+ * client.
+ */
+typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+				    struct ib_mad_send_buf *send_buf,
+				    struct ib_mad_recv_wc *mad_recv_wc);
+
+/**
+ * ib_mad_agent - Used to track MAD registration with the access layer.
+ * @device: Reference to device registration is on.
+ * @qp: Reference to QP used for sending and receiving MADs.
+ * @mr: Memory region for system memory usable for DMA.
+ * @recv_handler: Callback handler for a received MAD.
+ * @send_handler: Callback handler for a sent MAD.
+ * @context: User-specified context associated with this registration.
+ * @hi_tid: Access layer assigned transaction ID for this client.
+ *   Unsolicited MADs sent by this client will have the upper 32-bits
+ *   of their TID set to this value.
+ * @flags: registration flags
+ * @port_num: Port number on which QP is registered
+ * @rmpp_version: If set, indicates the RMPP version used by this agent.
+ */
+enum {
+	IB_MAD_USER_RMPP = IB_USER_MAD_USER_RMPP,
+};
+struct ib_mad_agent {
+	struct ib_device	*device;
+	struct ib_qp		*qp;
+	ib_mad_recv_handler	recv_handler;
+	ib_mad_send_handler	send_handler;
+	void			*context;
+	u32			hi_tid;
+	u32			flags;
+	void			*security;
+	struct list_head	mad_agent_sec_list;
+	u8			port_num;
+	u8			rmpp_version;
+	bool			smp_allowed;
+};
+
+/**
+ * ib_mad_send_wc - MAD send completion information.
+ * @send_buf: Send MAD data buffer associated with the send MAD request.
+ * @status: Completion status.
+ * @vendor_err: Optional vendor error information returned with a failed
+ *   request.
+ */
+struct ib_mad_send_wc {
+	struct ib_mad_send_buf	*send_buf;
+	enum ib_wc_status	status;
+	u32			vendor_err;
+};
+
+/**
+ * ib_mad_recv_buf - received MAD buffer information.
+ * @list: Reference to next data buffer for a received RMPP MAD.
+ * @grh: References a data buffer containing the global route header.
+ *   The data refereced by this buffer is only valid if the GRH is
+ *   valid.
+ * @mad: References the start of the received MAD.
+ */
+struct ib_mad_recv_buf {
+	struct list_head	list;
+	struct ib_grh		*grh;
+	union {
+		struct ib_mad	*mad;
+		struct opa_mad	*opa_mad;
+	};
+};
+
+/**
+ * ib_mad_recv_wc - received MAD information.
+ * @wc: Completion information for the received data.
+ * @recv_buf: Specifies the location of the received data buffer(s).
+ * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
+ * @mad_len: The length of the received MAD, without duplicated headers.
+ * @mad_seg_size: The size of individual MAD segments
+ *
+ * For received response, the wr_id contains a pointer to the ib_mad_send_buf
+ *   for the corresponding send request.
+ */
+struct ib_mad_recv_wc {
+	struct ib_wc		*wc;
+	struct ib_mad_recv_buf	recv_buf;
+	struct list_head	rmpp_list;
+	int			mad_len;
+	size_t			mad_seg_size;
+};
+
+/**
+ * ib_mad_reg_req - MAD registration request
+ * @mgmt_class: Indicates which management class of MADs should be receive
+ *   by the caller.  This field is only required if the user wishes to
+ *   receive unsolicited MADs, otherwise it should be 0.
+ * @mgmt_class_version: Indicates which version of MADs for the given
+ *   management class to receive.
+ * @oui: Indicates IEEE OUI when mgmt_class is a vendor class
+ *   in the range from 0x30 to 0x4f. Otherwise not used.
+ * @method_mask: The caller will receive unsolicited MADs for any method
+ *   where @method_mask = 1.
+ *
+ */
+struct ib_mad_reg_req {
+	u8	mgmt_class;
+	u8	mgmt_class_version;
+	u8	oui[3];
+	DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS);
+};
+
+/**
+ * ib_register_mad_agent - Register to send/receive MADs.
+ * @device: The device to register with.
+ * @port_num: The port on the specified device to use.
+ * @qp_type: Specifies which QP to access.  Must be either
+ *   IB_QPT_SMI or IB_QPT_GSI.
+ * @mad_reg_req: Specifies which unsolicited MADs should be received
+ *   by the caller.  This parameter may be NULL if the caller only
+ *   wishes to receive solicited responses.
+ * @rmpp_version: If set, indicates that the client will send
+ *   and receive MADs that contain the RMPP header for the given version.
+ *   If set to 0, indicates that RMPP is not used by this client.
+ * @send_handler: The completion callback routine invoked after a send
+ *   request has completed.
+ * @recv_handler: The completion callback routine invoked for a received
+ *   MAD.
+ * @context: User specified context associated with the registration.
+ * @registration_flags: Registration flags to set for this agent
+ */
+struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
+					   u8 port_num,
+					   enum ib_qp_type qp_type,
+					   struct ib_mad_reg_req *mad_reg_req,
+					   u8 rmpp_version,
+					   ib_mad_send_handler send_handler,
+					   ib_mad_recv_handler recv_handler,
+					   void *context,
+					   u32 registration_flags);
+/**
+ * ib_unregister_mad_agent - Unregisters a client from using MAD services.
+ * @mad_agent: Corresponding MAD registration request to deregister.
+ *
+ * After invoking this routine, MAD services are no longer usable by the
+ * client on the associated QP.
+ */
+void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent);
+
+/**
+ * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
+ *   with the registered client.
+ * @send_buf: Specifies the information needed to send the MAD(s).
+ * @bad_send_buf: Specifies the MAD on which an error was encountered.  This
+ *   parameter is optional if only a single MAD is posted.
+ *
+ * Sent MADs are not guaranteed to complete in the order that they were posted.
+ *
+ * If the MAD requires RMPP, the data buffer should contain a single copy
+ * of the common MAD, RMPP, and class specific headers, followed by the class
+ * defined data.  If the class defined data would not divide evenly into
+ * RMPP segments, then space must be allocated at the end of the referenced
+ * buffer for any required padding.  To indicate the amount of class defined
+ * data being transferred, the paylen_newwin field in the RMPP header should
+ * be set to the size of the class specific header plus the amount of class
+ * defined data being transferred.  The paylen_newwin field should be
+ * specified in network-byte order.
+ */
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+		     struct ib_mad_send_buf **bad_send_buf);
+
+
+/**
+ * ib_free_recv_mad - Returns data buffers used to receive a MAD.
+ * @mad_recv_wc: Work completion information for a received MAD.
+ *
+ * Clients receiving MADs through their ib_mad_recv_handler must call this
+ * routine to return the work completion buffers to the access layer.
+ */
+void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc);
+
+/**
+ * ib_cancel_mad - Cancels an outstanding send MAD operation.
+ * @mad_agent: Specifies the registration associated with sent MAD.
+ * @send_buf: Indicates the MAD to cancel.
+ *
+ * MADs will be returned to the user through the corresponding
+ * ib_mad_send_handler.
+ */
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+		   struct ib_mad_send_buf *send_buf);
+
+/**
+ * ib_modify_mad - Modifies an outstanding send MAD operation.
+ * @mad_agent: Specifies the registration associated with sent MAD.
+ * @send_buf: Indicates the MAD to modify.
+ * @timeout_ms: New timeout value for sent MAD.
+ *
+ * This call will reset the timeout value for a sent MAD to the specified
+ * value.
+ */
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+		  struct ib_mad_send_buf *send_buf, u32 timeout_ms);
+
+/**
+ * ib_create_send_mad - Allocate and initialize a data buffer and work request
+ *   for sending a MAD.
+ * @mad_agent: Specifies the registered MAD service to associate with the MAD.
+ * @remote_qpn: Specifies the QPN of the receiving node.
+ * @pkey_index: Specifies which PKey the MAD will be sent using.  This field
+ *   is valid only if the remote_qpn is QP 1.
+ * @rmpp_active: Indicates if the send will enable RMPP.
+ * @hdr_len: Indicates the size of the data header of the MAD.  This length
+ *   should include the common MAD header, RMPP header, plus any class
+ *   specific header.
+ * @data_len: Indicates the size of any user-transferred data.  The call will
+ *   automatically adjust the allocated buffer size to account for any
+ *   additional padding that may be necessary.
+ * @gfp_mask: GFP mask used for the memory allocation.
+ * @base_version: Base Version of this MAD
+ *
+ * This routine allocates a MAD for sending.  The returned MAD send buffer
+ * will reference a data buffer usable for sending a MAD, along
+ * with an initialized work request structure.  Users may modify the returned
+ * MAD data buffer before posting the send.
+ *
+ * The returned MAD header, class specific headers, and any padding will be
+ * cleared.  Users are responsible for initializing the common MAD header,
+ * any class specific header, and MAD data area.
+ * If @rmpp_active is set, the RMPP header will be initialized for sending.
+ */
+struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
+					   u32 remote_qpn, u16 pkey_index,
+					   int rmpp_active,
+					   int hdr_len, int data_len,
+					   gfp_t gfp_mask,
+					   u8 base_version);
+
+/**
+ * ib_is_mad_class_rmpp - returns whether given management class
+ * supports RMPP.
+ * @mgmt_class: management class
+ *
+ * This routine returns whether the management class supports RMPP.
+ */
+int ib_is_mad_class_rmpp(u8 mgmt_class);
+
+/**
+ * ib_get_mad_data_offset - returns the data offset for a given
+ * management class.
+ * @mgmt_class: management class
+ *
+ * This routine returns the data offset in the MAD for the management
+ * class requested.
+ */
+int ib_get_mad_data_offset(u8 mgmt_class);
+
+/**
+ * ib_get_rmpp_segment - returns the data buffer for a given RMPP segment.
+ * @send_buf: Previously allocated send data buffer.
+ * @seg_num: number of segment to return
+ *
+ * This routine returns a pointer to the data buffer of an RMPP MAD.
+ * Users must provide synchronization to @send_buf around this call.
+ */
+void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
+
+/**
+ * ib_free_send_mad - Returns data buffers used to send a MAD.
+ * @send_buf: Previously allocated send data buffer.
+ */
+void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
+
+/**
+ * ib_mad_kernel_rmpp_agent - Returns if the agent is performing RMPP.
+ * @agent: the agent in question
+ * @return: true if agent is performing rmpp, false otherwise.
+ */
+int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent);
+
+#endif /* IB_MAD_H */
diff --git a/include/rdma/ib_marshall.h b/include/rdma/ib_marshall.h
new file mode 100644
index 000000000..1838869aa
--- /dev/null
+++ b/include/rdma/ib_marshall.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef IB_USER_MARSHALL_H
+#define IB_USER_MARSHALL_H
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_user_sa.h>
+
+void ib_copy_qp_attr_to_user(struct ib_device *device,
+			     struct ib_uverbs_qp_attr *dst,
+			     struct ib_qp_attr *src);
+
+void ib_copy_ah_attr_to_user(struct ib_device *device,
+			     struct ib_uverbs_ah_attr *dst,
+			     struct rdma_ah_attr *src);
+
+void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
+			      struct sa_path_rec *src);
+
+void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
+				struct ib_user_path_rec *src);
+
+#endif /* IB_USER_MARSHALL_H */
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
new file mode 100644
index 000000000..a9162f25b
--- /dev/null
+++ b/include/rdma/ib_pack.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ */
+
+#ifndef IB_PACK_H
+#define IB_PACK_H
+
+#include <rdma/ib_verbs.h>
+#include <uapi/linux/if_ether.h>
+
+enum {
+	IB_LRH_BYTES		= 8,
+	IB_ETH_BYTES		= 14,
+	IB_VLAN_BYTES		= 4,
+	IB_GRH_BYTES		= 40,
+	IB_IP4_BYTES		= 20,
+	IB_UDP_BYTES		= 8,
+	IB_BTH_BYTES		= 12,
+	IB_DETH_BYTES		= 8,
+	IB_EXT_ATOMICETH_BYTES	= 28,
+	IB_EXT_XRC_BYTES	= 4,
+	IB_ICRC_BYTES		= 4
+};
+
+struct ib_field {
+	size_t struct_offset_bytes;
+	size_t struct_size_bytes;
+	int    offset_words;
+	int    offset_bits;
+	int    size_bits;
+	char  *field_name;
+};
+
+#define RESERVED \
+	.field_name          = "reserved"
+
+/*
+ * This macro cleans up the definitions of constants for BTH opcodes.
+ * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY,
+ * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives
+ * the correct value.
+ *
+ * In short, user code should use the constants defined using the
+ * macro rather than worrying about adding together other constants.
+*/
+#define IB_OPCODE(transport, op) \
+	IB_OPCODE_ ## transport ## _ ## op = \
+		IB_OPCODE_ ## transport + IB_OPCODE_ ## op
+
+enum {
+	/* transport types -- just used to define real constants */
+	IB_OPCODE_RC                                = 0x00,
+	IB_OPCODE_UC                                = 0x20,
+	IB_OPCODE_RD                                = 0x40,
+	IB_OPCODE_UD                                = 0x60,
+	/* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
+	IB_OPCODE_CNP                               = 0x80,
+	/* Manufacturer specific */
+	IB_OPCODE_MSP                               = 0xe0,
+
+	/* operations -- just used to define real constants */
+	IB_OPCODE_SEND_FIRST                        = 0x00,
+	IB_OPCODE_SEND_MIDDLE                       = 0x01,
+	IB_OPCODE_SEND_LAST                         = 0x02,
+	IB_OPCODE_SEND_LAST_WITH_IMMEDIATE          = 0x03,
+	IB_OPCODE_SEND_ONLY                         = 0x04,
+	IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE          = 0x05,
+	IB_OPCODE_RDMA_WRITE_FIRST                  = 0x06,
+	IB_OPCODE_RDMA_WRITE_MIDDLE                 = 0x07,
+	IB_OPCODE_RDMA_WRITE_LAST                   = 0x08,
+	IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE    = 0x09,
+	IB_OPCODE_RDMA_WRITE_ONLY                   = 0x0a,
+	IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE    = 0x0b,
+	IB_OPCODE_RDMA_READ_REQUEST                 = 0x0c,
+	IB_OPCODE_RDMA_READ_RESPONSE_FIRST          = 0x0d,
+	IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE         = 0x0e,
+	IB_OPCODE_RDMA_READ_RESPONSE_LAST           = 0x0f,
+	IB_OPCODE_RDMA_READ_RESPONSE_ONLY           = 0x10,
+	IB_OPCODE_ACKNOWLEDGE                       = 0x11,
+	IB_OPCODE_ATOMIC_ACKNOWLEDGE                = 0x12,
+	IB_OPCODE_COMPARE_SWAP                      = 0x13,
+	IB_OPCODE_FETCH_ADD                         = 0x14,
+	/* opcode 0x15 is reserved */
+	IB_OPCODE_SEND_LAST_WITH_INVALIDATE         = 0x16,
+	IB_OPCODE_SEND_ONLY_WITH_INVALIDATE         = 0x17,
+
+	/* real constants follow -- see comment about above IB_OPCODE()
+	   macro for more details */
+
+	/* RC */
+	IB_OPCODE(RC, SEND_FIRST),
+	IB_OPCODE(RC, SEND_MIDDLE),
+	IB_OPCODE(RC, SEND_LAST),
+	IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RC, SEND_ONLY),
+	IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RC, RDMA_WRITE_FIRST),
+	IB_OPCODE(RC, RDMA_WRITE_MIDDLE),
+	IB_OPCODE(RC, RDMA_WRITE_LAST),
+	IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RC, RDMA_WRITE_ONLY),
+	IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RC, RDMA_READ_REQUEST),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST),
+	IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY),
+	IB_OPCODE(RC, ACKNOWLEDGE),
+	IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE),
+	IB_OPCODE(RC, COMPARE_SWAP),
+	IB_OPCODE(RC, FETCH_ADD),
+	IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
+	IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
+
+	/* UC */
+	IB_OPCODE(UC, SEND_FIRST),
+	IB_OPCODE(UC, SEND_MIDDLE),
+	IB_OPCODE(UC, SEND_LAST),
+	IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(UC, SEND_ONLY),
+	IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(UC, RDMA_WRITE_FIRST),
+	IB_OPCODE(UC, RDMA_WRITE_MIDDLE),
+	IB_OPCODE(UC, RDMA_WRITE_LAST),
+	IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(UC, RDMA_WRITE_ONLY),
+	IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+
+	/* RD */
+	IB_OPCODE(RD, SEND_FIRST),
+	IB_OPCODE(RD, SEND_MIDDLE),
+	IB_OPCODE(RD, SEND_LAST),
+	IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RD, SEND_ONLY),
+	IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RD, RDMA_WRITE_FIRST),
+	IB_OPCODE(RD, RDMA_WRITE_MIDDLE),
+	IB_OPCODE(RD, RDMA_WRITE_LAST),
+	IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE),
+	IB_OPCODE(RD, RDMA_WRITE_ONLY),
+	IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE),
+	IB_OPCODE(RD, RDMA_READ_REQUEST),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST),
+	IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY),
+	IB_OPCODE(RD, ACKNOWLEDGE),
+	IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
+	IB_OPCODE(RD, COMPARE_SWAP),
+	IB_OPCODE(RD, FETCH_ADD),
+
+	/* UD */
+	IB_OPCODE(UD, SEND_ONLY),
+	IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE)
+};
+
+enum {
+	IB_LNH_RAW        = 0,
+	IB_LNH_IP         = 1,
+	IB_LNH_IBA_LOCAL  = 2,
+	IB_LNH_IBA_GLOBAL = 3
+};
+
+struct ib_unpacked_lrh {
+	u8        virtual_lane;
+	u8        link_version;
+	u8        service_level;
+	u8        link_next_header;
+	__be16    destination_lid;
+	__be16    packet_length;
+	__be16    source_lid;
+};
+
+struct ib_unpacked_grh {
+	u8    	     ip_version;
+	u8    	     traffic_class;
+	__be32 	     flow_label;
+	__be16       payload_length;
+	u8    	     next_header;
+	u8    	     hop_limit;
+	union ib_gid source_gid;
+	union ib_gid destination_gid;
+};
+
+struct ib_unpacked_bth {
+	u8           opcode;
+	u8           solicited_event;
+	u8           mig_req;
+	u8           pad_count;
+	u8           transport_header_version;
+	__be16       pkey;
+	__be32       destination_qpn;
+	u8           ack_req;
+	__be32       psn;
+};
+
+struct ib_unpacked_deth {
+	__be32       qkey;
+	__be32       source_qpn;
+};
+
+struct ib_unpacked_eth {
+	u8	dmac_h[4];
+	u8	dmac_l[2];
+	u8	smac_h[2];
+	u8	smac_l[4];
+	__be16	type;
+};
+
+struct ib_unpacked_ip4 {
+	u8	ver;
+	u8	hdr_len;
+	u8	tos;
+	__be16	tot_len;
+	__be16	id;
+	__be16	frag_off;
+	u8	ttl;
+	u8	protocol;
+	__sum16	check;
+	__be32	saddr;
+	__be32	daddr;
+};
+
+struct ib_unpacked_udp {
+	__be16	sport;
+	__be16	dport;
+	__be16	length;
+	__be16	csum;
+};
+
+struct ib_unpacked_vlan {
+	__be16  tag;
+	__be16  type;
+};
+
+struct ib_ud_header {
+	int                     lrh_present;
+	struct ib_unpacked_lrh  lrh;
+	int			eth_present;
+	struct ib_unpacked_eth	eth;
+	int                     vlan_present;
+	struct ib_unpacked_vlan vlan;
+	int			grh_present;
+	struct ib_unpacked_grh	grh;
+	int			ipv4_present;
+	struct ib_unpacked_ip4	ip4;
+	int			udp_present;
+	struct ib_unpacked_udp	udp;
+	struct ib_unpacked_bth	bth;
+	struct ib_unpacked_deth deth;
+	int			immediate_present;
+	__be32			immediate_data;
+};
+
+void ib_pack(const struct ib_field        *desc,
+	     int                           desc_len,
+	     void                         *structure,
+	     void                         *buf);
+
+void ib_unpack(const struct ib_field        *desc,
+	       int                           desc_len,
+	       void                         *buf,
+	       void                         *structure);
+
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header);
+
+int ib_ud_header_init(int		    payload_bytes,
+		      int		    lrh_present,
+		      int		    eth_present,
+		      int		    vlan_present,
+		      int		    grh_present,
+		      int		    ip_version,
+		      int		    udp_present,
+		      int		    immediate_present,
+		      struct ib_ud_header *header);
+
+int ib_ud_header_pack(struct ib_ud_header *header,
+		      void                *buf);
+
+int ib_ud_header_unpack(void                *buf,
+			struct ib_ud_header *header);
+
+#endif /* IB_PACK_H */
diff --git a/include/rdma/ib_pma.h b/include/rdma/ib_pma.h
new file mode 100644
index 000000000..44c618203
--- /dev/null
+++ b/include/rdma/ib_pma.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ */
+
+#ifndef IB_PMA_H
+#define IB_PMA_H
+
+#include <rdma/ib_mad.h>
+
+/*
+ * PMA class portinfo capability mask bits
+ */
+#define IB_PMA_CLASS_CAP_ALLPORTSELECT  cpu_to_be16(1 << 8)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH      cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10)
+#define IB_PMA_CLASS_CAP_XMIT_WAIT      cpu_to_be16(1 << 12)
+
+#define IB_PMA_CLASS_PORT_INFO          cpu_to_be16(0x0001)
+#define IB_PMA_PORT_SAMPLES_CONTROL     cpu_to_be16(0x0010)
+#define IB_PMA_PORT_SAMPLES_RESULT      cpu_to_be16(0x0011)
+#define IB_PMA_PORT_COUNTERS            cpu_to_be16(0x0012)
+#define IB_PMA_PORT_COUNTERS_EXT        cpu_to_be16(0x001D)
+#define IB_PMA_PORT_SAMPLES_RESULT_EXT  cpu_to_be16(0x001E)
+
+struct ib_pma_mad {
+	struct ib_mad_hdr mad_hdr;
+	u8 reserved[40];
+	u8 data[192];
+} __packed;
+
+struct ib_pma_portsamplescontrol {
+	u8 opcode;
+	u8 port_select;
+	u8 tick;
+	u8 counter_width;		/* resv: 7:3, counter width: 2:0 */
+	__be32 counter_mask0_9;		/* 2, 10 3-bit fields */
+	__be16 counter_mask10_14;	/* 1, 5 3-bit fields */
+	u8 sample_mechanisms;
+	u8 sample_status;		/* only lower 2 bits */
+	__be64 option_mask;
+	__be64 vendor_mask;
+	__be32 sample_start;
+	__be32 sample_interval;
+	__be16 tag;
+	__be16 counter_select[15];
+	__be32 reserved1;
+	__be64 samples_only_option_mask;
+	__be32 reserved2[28];
+};
+
+struct ib_pma_portsamplesresult {
+	__be16 tag;
+	__be16 sample_status;   /* only lower 2 bits */
+	__be32 counter[15];
+};
+
+struct ib_pma_portsamplesresult_ext {
+	__be16 tag;
+	__be16 sample_status;   /* only lower 2 bits */
+	__be32 extended_width;  /* only upper 2 bits */
+	__be64 counter[15];
+};
+
+struct ib_pma_portcounters {
+	u8 reserved;
+	u8 port_select;
+	__be16 counter_select;
+	__be16 symbol_error_counter;
+	u8 link_error_recovery_counter;
+	u8 link_downed_counter;
+	__be16 port_rcv_errors;
+	__be16 port_rcv_remphys_errors;
+	__be16 port_rcv_switch_relay_errors;
+	__be16 port_xmit_discards;
+	u8 port_xmit_constraint_errors;
+	u8 port_rcv_constraint_errors;
+	u8 reserved1;
+	u8 link_overrun_errors; /* LocalLink: 7:4, BufferOverrun: 3:0 */
+	__be16 reserved2;
+	__be16 vl15_dropped;
+	__be32 port_xmit_data;
+	__be32 port_rcv_data;
+	__be32 port_xmit_packets;
+	__be32 port_rcv_packets;
+	__be32 port_xmit_wait;
+} __packed;
+
+
+#define IB_PMA_SEL_SYMBOL_ERROR                 cpu_to_be16(0x0001)
+#define IB_PMA_SEL_LINK_ERROR_RECOVERY          cpu_to_be16(0x0002)
+#define IB_PMA_SEL_LINK_DOWNED                  cpu_to_be16(0x0004)
+#define IB_PMA_SEL_PORT_RCV_ERRORS              cpu_to_be16(0x0008)
+#define IB_PMA_SEL_PORT_RCV_REMPHYS_ERRORS      cpu_to_be16(0x0010)
+#define IB_PMA_SEL_PORT_XMIT_DISCARDS           cpu_to_be16(0x0040)
+#define IB_PMA_SEL_LOCAL_LINK_INTEGRITY_ERRORS  cpu_to_be16(0x0200)
+#define IB_PMA_SEL_EXCESSIVE_BUFFER_OVERRUNS    cpu_to_be16(0x0400)
+#define IB_PMA_SEL_PORT_VL15_DROPPED            cpu_to_be16(0x0800)
+#define IB_PMA_SEL_PORT_XMIT_DATA               cpu_to_be16(0x1000)
+#define IB_PMA_SEL_PORT_RCV_DATA                cpu_to_be16(0x2000)
+#define IB_PMA_SEL_PORT_XMIT_PACKETS            cpu_to_be16(0x4000)
+#define IB_PMA_SEL_PORT_RCV_PACKETS             cpu_to_be16(0x8000)
+
+struct ib_pma_portcounters_ext {
+	u8 reserved;
+	u8 port_select;
+	__be16 counter_select;
+	__be32 reserved1;
+	__be64 port_xmit_data;
+	__be64 port_rcv_data;
+	__be64 port_xmit_packets;
+	__be64 port_rcv_packets;
+	__be64 port_unicast_xmit_packets;
+	__be64 port_unicast_rcv_packets;
+	__be64 port_multicast_xmit_packets;
+	__be64 port_multicast_rcv_packets;
+} __packed;
+
+#define IB_PMA_SELX_PORT_XMIT_DATA              cpu_to_be16(0x0001)
+#define IB_PMA_SELX_PORT_RCV_DATA               cpu_to_be16(0x0002)
+#define IB_PMA_SELX_PORT_XMIT_PACKETS           cpu_to_be16(0x0004)
+#define IB_PMA_SELX_PORT_RCV_PACKETS            cpu_to_be16(0x0008)
+#define IB_PMA_SELX_PORT_UNI_XMIT_PACKETS       cpu_to_be16(0x0010)
+#define IB_PMA_SELX_PORT_UNI_RCV_PACKETS        cpu_to_be16(0x0020)
+#define IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS     cpu_to_be16(0x0040)
+#define IB_PMA_SELX_PORT_MULTI_RCV_PACKETS      cpu_to_be16(0x0080)
+
+#endif /* IB_PMA_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
new file mode 100644
index 000000000..693285e76
--- /dev/null
+++ b/include/rdma/ib_sa.h
@@ -0,0 +1,635 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2004 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef IB_SA_H
+#define IB_SA_H
+
+#include <linux/completion.h>
+#include <linux/compiler.h>
+
+#include <linux/atomic.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_addr.h>
+#include <rdma/opa_addr.h>
+
+enum {
+	IB_SA_CLASS_VERSION		= 2,	/* IB spec version 1.1/1.2 */
+
+	IB_SA_METHOD_GET_TABLE		= 0x12,
+	IB_SA_METHOD_GET_TABLE_RESP	= 0x92,
+	IB_SA_METHOD_DELETE		= 0x15,
+	IB_SA_METHOD_DELETE_RESP	= 0x95,
+	IB_SA_METHOD_GET_MULTI		= 0x14,
+	IB_SA_METHOD_GET_MULTI_RESP	= 0x94,
+	IB_SA_METHOD_GET_TRACE_TBL	= 0x13
+};
+
+#define OPA_SA_CLASS_VERSION	0x80
+enum {
+	IB_SA_ATTR_CLASS_PORTINFO    = 0x01,
+	IB_SA_ATTR_NOTICE	     = 0x02,
+	IB_SA_ATTR_INFORM_INFO	     = 0x03,
+	IB_SA_ATTR_NODE_REC	     = 0x11,
+	IB_SA_ATTR_PORT_INFO_REC     = 0x12,
+	IB_SA_ATTR_SL2VL_REC	     = 0x13,
+	IB_SA_ATTR_SWITCH_REC	     = 0x14,
+	IB_SA_ATTR_LINEAR_FDB_REC    = 0x15,
+	IB_SA_ATTR_RANDOM_FDB_REC    = 0x16,
+	IB_SA_ATTR_MCAST_FDB_REC     = 0x17,
+	IB_SA_ATTR_SM_INFO_REC	     = 0x18,
+	IB_SA_ATTR_LINK_REC	     = 0x20,
+	IB_SA_ATTR_GUID_INFO_REC     = 0x30,
+	IB_SA_ATTR_SERVICE_REC	     = 0x31,
+	IB_SA_ATTR_PARTITION_REC     = 0x33,
+	IB_SA_ATTR_PATH_REC	     = 0x35,
+	IB_SA_ATTR_VL_ARB_REC	     = 0x36,
+	IB_SA_ATTR_MC_MEMBER_REC     = 0x38,
+	IB_SA_ATTR_TRACE_REC	     = 0x39,
+	IB_SA_ATTR_MULTI_PATH_REC    = 0x3a,
+	IB_SA_ATTR_SERVICE_ASSOC_REC = 0x3b,
+	IB_SA_ATTR_INFORM_INFO_REC   = 0xf3
+};
+
+enum ib_sa_selector {
+	IB_SA_GT   = 0,
+	IB_SA_LT   = 1,
+	IB_SA_EQ   = 2,
+	/*
+	 * The meaning of "best" depends on the attribute: for
+	 * example, for MTU best will return the largest available
+	 * MTU, while for packet life time, best will return the
+	 * smallest available life time.
+	 */
+	IB_SA_BEST = 3
+};
+
+/*
+ * There are 4 types of join states:
+ * FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
+ * The order corresponds to JoinState bits in MCMemberRecord.
+ */
+enum ib_sa_mc_join_states {
+	FULLMEMBER_JOIN,
+	NONMEMBER_JOIN,
+	SENDONLY_NONMEBER_JOIN,
+	SENDONLY_FULLMEMBER_JOIN,
+	NUM_JOIN_MEMBERSHIP_TYPES,
+};
+
+#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT	BIT(12)
+
+/*
+ * Structures for SA records are named "struct ib_sa_xxx_rec."  No
+ * attempt is made to pack structures to match the physical layout of
+ * SA records in SA MADs; all packing and unpacking is handled by the
+ * SA query code.
+ *
+ * For a record with structure ib_sa_xxx_rec, the naming convention
+ * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we
+ * never use different abbreviations or otherwise change the spelling
+ * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY).
+ *
+ * Reserved rows are indicated with comments to help maintainability.
+ */
+
+#define IB_SA_PATH_REC_SERVICE_ID		       (IB_SA_COMP_MASK( 0) |\
+							IB_SA_COMP_MASK( 1))
+#define IB_SA_PATH_REC_DGID				IB_SA_COMP_MASK( 2)
+#define IB_SA_PATH_REC_SGID				IB_SA_COMP_MASK( 3)
+#define IB_SA_PATH_REC_DLID				IB_SA_COMP_MASK( 4)
+#define IB_SA_PATH_REC_SLID				IB_SA_COMP_MASK( 5)
+#define IB_SA_PATH_REC_RAW_TRAFFIC			IB_SA_COMP_MASK( 6)
+/* reserved:								 7 */
+#define IB_SA_PATH_REC_FLOW_LABEL       		IB_SA_COMP_MASK( 8)
+#define IB_SA_PATH_REC_HOP_LIMIT			IB_SA_COMP_MASK( 9)
+#define IB_SA_PATH_REC_TRAFFIC_CLASS			IB_SA_COMP_MASK(10)
+#define IB_SA_PATH_REC_REVERSIBLE			IB_SA_COMP_MASK(11)
+#define IB_SA_PATH_REC_NUMB_PATH			IB_SA_COMP_MASK(12)
+#define IB_SA_PATH_REC_PKEY				IB_SA_COMP_MASK(13)
+#define IB_SA_PATH_REC_QOS_CLASS			IB_SA_COMP_MASK(14)
+#define IB_SA_PATH_REC_SL				IB_SA_COMP_MASK(15)
+#define IB_SA_PATH_REC_MTU_SELECTOR			IB_SA_COMP_MASK(16)
+#define IB_SA_PATH_REC_MTU				IB_SA_COMP_MASK(17)
+#define IB_SA_PATH_REC_RATE_SELECTOR			IB_SA_COMP_MASK(18)
+#define IB_SA_PATH_REC_RATE				IB_SA_COMP_MASK(19)
+#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR	IB_SA_COMP_MASK(20)
+#define IB_SA_PATH_REC_PACKET_LIFE_TIME			IB_SA_COMP_MASK(21)
+#define IB_SA_PATH_REC_PREFERENCE			IB_SA_COMP_MASK(22)
+
+enum sa_path_rec_type {
+	SA_PATH_REC_TYPE_IB,
+	SA_PATH_REC_TYPE_ROCE_V1,
+	SA_PATH_REC_TYPE_ROCE_V2,
+	SA_PATH_REC_TYPE_OPA
+};
+
+struct sa_path_rec_ib {
+	__be16       dlid;
+	__be16       slid;
+	u8           raw_traffic;
+};
+
+/**
+ * struct sa_path_rec_roce - RoCE specific portion of the path record entry
+ * @route_resolved:	When set, it indicates that this route is already
+ *			resolved for this path record entry.
+ * @dmac:		Destination mac address for the given DGID entry
+ *			of the path record entry.
+ */
+struct sa_path_rec_roce {
+	bool	route_resolved;
+	u8	dmac[ETH_ALEN];
+};
+
+struct sa_path_rec_opa {
+	__be32       dlid;
+	__be32       slid;
+	u8           raw_traffic;
+	u8	     l2_8B;
+	u8	     l2_10B;
+	u8	     l2_9B;
+	u8	     l2_16B;
+	u8	     qos_type;
+	u8	     qos_priority;
+};
+
+struct sa_path_rec {
+	union ib_gid dgid;
+	union ib_gid sgid;
+	__be64       service_id;
+	/* reserved */
+	__be32       flow_label;
+	u8           hop_limit;
+	u8           traffic_class;
+	u8           reversible;
+	u8           numb_path;
+	__be16       pkey;
+	__be16       qos_class;
+	u8           sl;
+	u8           mtu_selector;
+	u8           mtu;
+	u8           rate_selector;
+	u8           rate;
+	u8           packet_life_time_selector;
+	u8           packet_life_time;
+	u8           preference;
+	union {
+		struct sa_path_rec_ib ib;
+		struct sa_path_rec_roce roce;
+		struct sa_path_rec_opa opa;
+	};
+	enum sa_path_rec_type rec_type;
+};
+
+static inline enum ib_gid_type
+		sa_conv_pathrec_to_gid_type(struct sa_path_rec *rec)
+{
+	switch (rec->rec_type) {
+	case SA_PATH_REC_TYPE_ROCE_V1:
+		return IB_GID_TYPE_ROCE;
+	case SA_PATH_REC_TYPE_ROCE_V2:
+		return IB_GID_TYPE_ROCE_UDP_ENCAP;
+	default:
+		return IB_GID_TYPE_IB;
+	}
+}
+
+static inline enum sa_path_rec_type
+		sa_conv_gid_to_pathrec_type(enum ib_gid_type type)
+{
+	switch (type) {
+	case IB_GID_TYPE_ROCE:
+		return SA_PATH_REC_TYPE_ROCE_V1;
+	case IB_GID_TYPE_ROCE_UDP_ENCAP:
+		return SA_PATH_REC_TYPE_ROCE_V2;
+	default:
+		return SA_PATH_REC_TYPE_IB;
+	}
+}
+
+static inline void path_conv_opa_to_ib(struct sa_path_rec *ib,
+				       struct sa_path_rec *opa)
+{
+	if ((be32_to_cpu(opa->opa.dlid) >=
+	     be16_to_cpu(IB_MULTICAST_LID_BASE)) ||
+	    (be32_to_cpu(opa->opa.slid) >=
+	     be16_to_cpu(IB_MULTICAST_LID_BASE))) {
+		/* Create OPA GID and zero out the LID */
+		ib->dgid.global.interface_id
+				= OPA_MAKE_ID(be32_to_cpu(opa->opa.dlid));
+		ib->dgid.global.subnet_prefix
+				= opa->dgid.global.subnet_prefix;
+		ib->sgid.global.interface_id
+				= OPA_MAKE_ID(be32_to_cpu(opa->opa.slid));
+		ib->dgid.global.subnet_prefix
+				= opa->dgid.global.subnet_prefix;
+		ib->ib.dlid	= 0;
+
+		ib->ib.slid	= 0;
+	} else {
+		ib->ib.dlid	= htons(ntohl(opa->opa.dlid));
+		ib->ib.slid	= htons(ntohl(opa->opa.slid));
+	}
+	ib->service_id		= opa->service_id;
+	ib->ib.raw_traffic	= opa->opa.raw_traffic;
+}
+
+static inline void path_conv_ib_to_opa(struct sa_path_rec *opa,
+				       struct sa_path_rec *ib)
+{
+	__be32 slid, dlid;
+
+	if ((ib_is_opa_gid(&ib->sgid)) ||
+	    (ib_is_opa_gid(&ib->dgid))) {
+		slid = htonl(opa_get_lid_from_gid(&ib->sgid));
+		dlid = htonl(opa_get_lid_from_gid(&ib->dgid));
+	} else {
+		slid = htonl(ntohs(ib->ib.slid));
+		dlid = htonl(ntohs(ib->ib.dlid));
+	}
+	opa->opa.slid		= slid;
+	opa->opa.dlid		= dlid;
+	opa->service_id		= ib->service_id;
+	opa->opa.raw_traffic	= ib->ib.raw_traffic;
+}
+
+/* Convert from OPA to IB path record */
+static inline void sa_convert_path_opa_to_ib(struct sa_path_rec *dest,
+					     struct sa_path_rec *src)
+{
+	if (src->rec_type != SA_PATH_REC_TYPE_OPA)
+		return;
+
+	*dest = *src;
+	dest->rec_type = SA_PATH_REC_TYPE_IB;
+	path_conv_opa_to_ib(dest, src);
+}
+
+/* Convert from IB to OPA path record */
+static inline void sa_convert_path_ib_to_opa(struct sa_path_rec *dest,
+					     struct sa_path_rec *src)
+{
+	if (src->rec_type != SA_PATH_REC_TYPE_IB)
+		return;
+
+	/* Do a structure copy and overwrite the relevant fields */
+	*dest = *src;
+	dest->rec_type = SA_PATH_REC_TYPE_OPA;
+	path_conv_ib_to_opa(dest, src);
+}
+
+#define IB_SA_MCMEMBER_REC_MGID				IB_SA_COMP_MASK( 0)
+#define IB_SA_MCMEMBER_REC_PORT_GID			IB_SA_COMP_MASK( 1)
+#define IB_SA_MCMEMBER_REC_QKEY				IB_SA_COMP_MASK( 2)
+#define IB_SA_MCMEMBER_REC_MLID				IB_SA_COMP_MASK( 3)
+#define IB_SA_MCMEMBER_REC_MTU_SELECTOR			IB_SA_COMP_MASK( 4)
+#define IB_SA_MCMEMBER_REC_MTU				IB_SA_COMP_MASK( 5)
+#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS		IB_SA_COMP_MASK( 6)
+#define IB_SA_MCMEMBER_REC_PKEY				IB_SA_COMP_MASK( 7)
+#define IB_SA_MCMEMBER_REC_RATE_SELECTOR		IB_SA_COMP_MASK( 8)
+#define IB_SA_MCMEMBER_REC_RATE				IB_SA_COMP_MASK( 9)
+#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR	IB_SA_COMP_MASK(10)
+#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME		IB_SA_COMP_MASK(11)
+#define IB_SA_MCMEMBER_REC_SL				IB_SA_COMP_MASK(12)
+#define IB_SA_MCMEMBER_REC_FLOW_LABEL			IB_SA_COMP_MASK(13)
+#define IB_SA_MCMEMBER_REC_HOP_LIMIT			IB_SA_COMP_MASK(14)
+#define IB_SA_MCMEMBER_REC_SCOPE			IB_SA_COMP_MASK(15)
+#define IB_SA_MCMEMBER_REC_JOIN_STATE			IB_SA_COMP_MASK(16)
+#define IB_SA_MCMEMBER_REC_PROXY_JOIN			IB_SA_COMP_MASK(17)
+
+struct ib_sa_mcmember_rec {
+	union ib_gid mgid;
+	union ib_gid port_gid;
+	__be32       qkey;
+	__be16       mlid;
+	u8           mtu_selector;
+	u8           mtu;
+	u8           traffic_class;
+	__be16       pkey;
+	u8 	     rate_selector;
+	u8 	     rate;
+	u8 	     packet_life_time_selector;
+	u8 	     packet_life_time;
+	u8           sl;
+	__be32       flow_label;
+	u8           hop_limit;
+	u8           scope;
+	u8           join_state;
+	u8           proxy_join;
+};
+
+/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1	*/
+#define IB_SA_SERVICE_REC_SERVICE_ID			IB_SA_COMP_MASK( 0)
+#define IB_SA_SERVICE_REC_SERVICE_GID			IB_SA_COMP_MASK( 1)
+#define IB_SA_SERVICE_REC_SERVICE_PKEY			IB_SA_COMP_MASK( 2)
+/* reserved:								 3 */
+#define IB_SA_SERVICE_REC_SERVICE_LEASE			IB_SA_COMP_MASK( 4)
+#define IB_SA_SERVICE_REC_SERVICE_KEY			IB_SA_COMP_MASK( 5)
+#define IB_SA_SERVICE_REC_SERVICE_NAME			IB_SA_COMP_MASK( 6)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_0		IB_SA_COMP_MASK( 7)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_1		IB_SA_COMP_MASK( 8)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_2		IB_SA_COMP_MASK( 9)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_3		IB_SA_COMP_MASK(10)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_4		IB_SA_COMP_MASK(11)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_5		IB_SA_COMP_MASK(12)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_6		IB_SA_COMP_MASK(13)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_7		IB_SA_COMP_MASK(14)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_8		IB_SA_COMP_MASK(15)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_9		IB_SA_COMP_MASK(16)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_10		IB_SA_COMP_MASK(17)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_11		IB_SA_COMP_MASK(18)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_12		IB_SA_COMP_MASK(19)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_13		IB_SA_COMP_MASK(20)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_14		IB_SA_COMP_MASK(21)
+#define IB_SA_SERVICE_REC_SERVICE_DATA8_15		IB_SA_COMP_MASK(22)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_0		IB_SA_COMP_MASK(23)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_1		IB_SA_COMP_MASK(24)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_2		IB_SA_COMP_MASK(25)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_3		IB_SA_COMP_MASK(26)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_4		IB_SA_COMP_MASK(27)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_5		IB_SA_COMP_MASK(28)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_6		IB_SA_COMP_MASK(29)
+#define IB_SA_SERVICE_REC_SERVICE_DATA16_7		IB_SA_COMP_MASK(30)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_0		IB_SA_COMP_MASK(31)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_1		IB_SA_COMP_MASK(32)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_2		IB_SA_COMP_MASK(33)
+#define IB_SA_SERVICE_REC_SERVICE_DATA32_3		IB_SA_COMP_MASK(34)
+#define IB_SA_SERVICE_REC_SERVICE_DATA64_0		IB_SA_COMP_MASK(35)
+#define IB_SA_SERVICE_REC_SERVICE_DATA64_1		IB_SA_COMP_MASK(36)
+
+#define IB_DEFAULT_SERVICE_LEASE 	0xFFFFFFFF
+
+struct ib_sa_service_rec {
+	u64		id;
+	union ib_gid	gid;
+	__be16 		pkey;
+	/* reserved */
+	u32		lease;
+	u8		key[16];
+	u8		name[64];
+	u8		data8[16];
+	u16		data16[8];
+	u32		data32[4];
+	u64		data64[2];
+};
+
+#define IB_SA_GUIDINFO_REC_LID		IB_SA_COMP_MASK(0)
+#define IB_SA_GUIDINFO_REC_BLOCK_NUM	IB_SA_COMP_MASK(1)
+#define IB_SA_GUIDINFO_REC_RES1		IB_SA_COMP_MASK(2)
+#define IB_SA_GUIDINFO_REC_RES2		IB_SA_COMP_MASK(3)
+#define IB_SA_GUIDINFO_REC_GID0		IB_SA_COMP_MASK(4)
+#define IB_SA_GUIDINFO_REC_GID1		IB_SA_COMP_MASK(5)
+#define IB_SA_GUIDINFO_REC_GID2		IB_SA_COMP_MASK(6)
+#define IB_SA_GUIDINFO_REC_GID3		IB_SA_COMP_MASK(7)
+#define IB_SA_GUIDINFO_REC_GID4		IB_SA_COMP_MASK(8)
+#define IB_SA_GUIDINFO_REC_GID5		IB_SA_COMP_MASK(9)
+#define IB_SA_GUIDINFO_REC_GID6		IB_SA_COMP_MASK(10)
+#define IB_SA_GUIDINFO_REC_GID7		IB_SA_COMP_MASK(11)
+
+struct ib_sa_guidinfo_rec {
+	__be16	lid;
+	u8	block_num;
+	/* reserved */
+	u8	res1;
+	__be32	res2;
+	u8	guid_info_list[64];
+};
+
+struct ib_sa_client {
+	atomic_t users;
+	struct completion comp;
+};
+
+/**
+ * ib_sa_register_client - Register an SA client.
+ */
+void ib_sa_register_client(struct ib_sa_client *client);
+
+/**
+ * ib_sa_unregister_client - Deregister an SA client.
+ * @client: Client object to deregister.
+ */
+void ib_sa_unregister_client(struct ib_sa_client *client);
+
+struct ib_sa_query;
+
+void ib_sa_cancel_query(int id, struct ib_sa_query *query);
+
+int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device,
+		       u8 port_num, struct sa_path_rec *rec,
+		       ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+		       gfp_t gfp_mask,
+		       void (*callback)(int status, struct sa_path_rec *resp,
+					void *context),
+		       void *context, struct ib_sa_query **query);
+
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+			    struct ib_device *device, u8 port_num, u8 method,
+			    struct ib_sa_service_rec *rec,
+			    ib_sa_comp_mask comp_mask, unsigned long timeout_ms,
+			    gfp_t gfp_mask,
+			    void (*callback)(int status,
+					     struct ib_sa_service_rec *resp,
+					     void *context),
+			    void *context, struct ib_sa_query **sa_query);
+
+struct ib_sa_multicast {
+	struct ib_sa_mcmember_rec rec;
+	ib_sa_comp_mask		comp_mask;
+	int			(*callback)(int status,
+					    struct ib_sa_multicast *multicast);
+	void			*context;
+};
+
+/**
+ * ib_sa_join_multicast - Initiates a join request to the specified multicast
+ *   group.
+ * @client: SA client
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @rec: SA multicast member record specifying group attributes.
+ * @comp_mask: Component mask indicating which group attributes of %rec are
+ *   valid.
+ * @gfp_mask: GFP mask for memory allocations.
+ * @callback: User callback invoked once the join operation completes.
+ * @context: User specified context stored with the ib_sa_multicast structure.
+ *
+ * This call initiates a multicast join request with the SA for the specified
+ * multicast group.  If the join operation is started successfully, it returns
+ * an ib_sa_multicast structure that is used to track the multicast operation.
+ * Users must free this structure by calling ib_free_multicast, even if the
+ * join operation later fails.  (The callback status is non-zero.)
+ *
+ * If the join operation fails; status will be non-zero, with the following
+ * failures possible:
+ * -ETIMEDOUT: The request timed out.
+ * -EIO: An error occurred sending the query.
+ * -EINVAL: The MCMemberRecord values differed from the existing group's.
+ * -ENETRESET: Indicates that an fatal error has occurred on the multicast
+ *   group, and the user must rejoin the group to continue using it.
+ */
+struct ib_sa_multicast *ib_sa_join_multicast(struct ib_sa_client *client,
+					     struct ib_device *device, u8 port_num,
+					     struct ib_sa_mcmember_rec *rec,
+					     ib_sa_comp_mask comp_mask, gfp_t gfp_mask,
+					     int (*callback)(int status,
+							     struct ib_sa_multicast
+								    *multicast),
+					     void *context);
+
+/**
+ * ib_free_multicast - Frees the multicast tracking structure, and releases
+ *    any reference on the multicast group.
+ * @multicast: Multicast tracking structure allocated by ib_join_multicast.
+ *
+ * This call blocks until the multicast identifier is destroyed.  It may
+ * not be called from within the multicast callback; however, returning a non-
+ * zero value from the callback will result in destroying the multicast
+ * tracking structure.
+ */
+void ib_sa_free_multicast(struct ib_sa_multicast *multicast);
+
+/**
+ * ib_get_mcmember_rec - Looks up a multicast member record by its MGID and
+ *   returns it if found.
+ * @device: Device associated with the multicast group.
+ * @port_num: Port on the specified device to associate with the multicast
+ *   group.
+ * @mgid: MGID of multicast group.
+ * @rec: Location to copy SA multicast member record.
+ */
+int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
+			   union ib_gid *mgid, struct ib_sa_mcmember_rec *rec);
+
+/**
+ * ib_init_ah_from_mcmember - Initialize address handle attributes based on
+ * an SA multicast member record.
+ */
+int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
+			     struct ib_sa_mcmember_rec *rec,
+			     struct net_device *ndev,
+			     enum ib_gid_type gid_type,
+			     struct rdma_ah_attr *ah_attr);
+
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+			      struct sa_path_rec *rec,
+			      struct rdma_ah_attr *ah_attr,
+			      const struct ib_gid_attr *sgid_attr);
+
+/**
+ * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
+ * to IB MAD wire format.
+ */
+void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute);
+
+/**
+ * ib_sa_unpack_path - Convert a path record from MAD format to struct
+ * ib_sa_path_rec.
+ */
+void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec);
+
+/* Support GuidInfoRecord */
+int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
+			      struct ib_device *device, u8 port_num,
+			      struct ib_sa_guidinfo_rec *rec,
+			      ib_sa_comp_mask comp_mask, u8 method,
+			      unsigned long timeout_ms, gfp_t gfp_mask,
+			      void (*callback)(int status,
+					       struct ib_sa_guidinfo_rec *resp,
+					       void *context),
+			      void *context, struct ib_sa_query **sa_query);
+
+bool ib_sa_sendonly_fullmem_support(struct ib_sa_client *client,
+				    struct ib_device *device,
+				    u8 port_num);
+
+static inline bool sa_path_is_roce(struct sa_path_rec *rec)
+{
+	return ((rec->rec_type == SA_PATH_REC_TYPE_ROCE_V1) ||
+		(rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2));
+}
+
+static inline bool sa_path_is_opa(struct sa_path_rec *rec)
+{
+	return (rec->rec_type == SA_PATH_REC_TYPE_OPA);
+}
+
+static inline void sa_path_set_slid(struct sa_path_rec *rec, u32 slid)
+{
+	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+		rec->ib.slid = cpu_to_be16(slid);
+	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+		rec->opa.slid = cpu_to_be32(slid);
+}
+
+static inline void sa_path_set_dlid(struct sa_path_rec *rec, u32 dlid)
+{
+	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+		rec->ib.dlid = cpu_to_be16(dlid);
+	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+		rec->opa.dlid = cpu_to_be32(dlid);
+}
+
+static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec,
+					   u8 raw_traffic)
+{
+	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+		rec->ib.raw_traffic = raw_traffic;
+	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+		rec->opa.raw_traffic = raw_traffic;
+}
+
+static inline __be32 sa_path_get_slid(struct sa_path_rec *rec)
+{
+	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+		return htonl(ntohs(rec->ib.slid));
+	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+		return rec->opa.slid;
+	return 0;
+}
+
+static inline __be32 sa_path_get_dlid(struct sa_path_rec *rec)
+{
+	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+		return htonl(ntohs(rec->ib.dlid));
+	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+		return rec->opa.dlid;
+	return 0;
+}
+
+static inline u8 sa_path_get_raw_traffic(struct sa_path_rec *rec)
+{
+	if (rec->rec_type == SA_PATH_REC_TYPE_IB)
+		return rec->ib.raw_traffic;
+	else if (rec->rec_type == SA_PATH_REC_TYPE_OPA)
+		return rec->opa.raw_traffic;
+	return 0;
+}
+
+static inline void sa_path_set_dmac(struct sa_path_rec *rec, u8 *dmac)
+{
+	if (sa_path_is_roce(rec))
+		memcpy(rec->roce.dmac, dmac, ETH_ALEN);
+}
+
+static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
+{
+	if (sa_path_is_roce(rec))
+		eth_zero_addr(rec->roce.dmac);
+}
+
+static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
+{
+	if (sa_path_is_roce(rec))
+		return rec->roce.dmac;
+	return NULL;
+}
+#endif /* IB_SA_H */
diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h
new file mode 100644
index 000000000..fdb8633cb
--- /dev/null
+++ b/include/rdma/ib_smi.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ */
+
+#ifndef IB_SMI_H
+#define IB_SMI_H
+
+#include <rdma/ib_mad.h>
+
+#define IB_SMP_DATA_SIZE			64
+#define IB_SMP_MAX_PATH_HOPS			64
+
+struct ib_smp {
+	u8	base_version;
+	u8	mgmt_class;
+	u8	class_version;
+	u8	method;
+	__be16	status;
+	u8	hop_ptr;
+	u8	hop_cnt;
+	__be64	tid;
+	__be16	attr_id;
+	__be16	resv;
+	__be32	attr_mod;
+	__be64	mkey;
+	__be16	dr_slid;
+	__be16	dr_dlid;
+	u8	reserved[28];
+	u8	data[IB_SMP_DATA_SIZE];
+	u8	initial_path[IB_SMP_MAX_PATH_HOPS];
+	u8	return_path[IB_SMP_MAX_PATH_HOPS];
+} __packed;
+
+#define IB_SMP_DIRECTION			cpu_to_be16(0x8000)
+
+/* Subnet management attributes */
+#define IB_SMP_ATTR_NOTICE			cpu_to_be16(0x0002)
+#define IB_SMP_ATTR_NODE_DESC			cpu_to_be16(0x0010)
+#define IB_SMP_ATTR_NODE_INFO			cpu_to_be16(0x0011)
+#define IB_SMP_ATTR_SWITCH_INFO			cpu_to_be16(0x0012)
+#define IB_SMP_ATTR_GUID_INFO			cpu_to_be16(0x0014)
+#define IB_SMP_ATTR_PORT_INFO			cpu_to_be16(0x0015)
+#define IB_SMP_ATTR_PKEY_TABLE			cpu_to_be16(0x0016)
+#define IB_SMP_ATTR_SL_TO_VL_TABLE		cpu_to_be16(0x0017)
+#define IB_SMP_ATTR_VL_ARB_TABLE		cpu_to_be16(0x0018)
+#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE	cpu_to_be16(0x0019)
+#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE	cpu_to_be16(0x001A)
+#define IB_SMP_ATTR_MCAST_FORWARD_TABLE		cpu_to_be16(0x001B)
+#define IB_SMP_ATTR_SM_INFO			cpu_to_be16(0x0020)
+#define IB_SMP_ATTR_VENDOR_DIAG			cpu_to_be16(0x0030)
+#define IB_SMP_ATTR_LED_INFO			cpu_to_be16(0x0031)
+#define IB_SMP_ATTR_VENDOR_MASK			cpu_to_be16(0xFF00)
+
+struct ib_port_info {
+	__be64 mkey;
+	__be64 gid_prefix;
+	__be16 lid;
+	__be16 sm_lid;
+	__be32 cap_mask;
+	__be16 diag_code;
+	__be16 mkey_lease_period;
+	u8 local_port_num;
+	u8 link_width_enabled;
+	u8 link_width_supported;
+	u8 link_width_active;
+	u8 linkspeed_portstate;			/* 4 bits, 4 bits */
+	u8 portphysstate_linkdown;		/* 4 bits, 4 bits */
+	u8 mkeyprot_resv_lmc;			/* 2 bits, 3, 3 */
+	u8 linkspeedactive_enabled;		/* 4 bits, 4 bits */
+	u8 neighbormtu_mastersmsl;		/* 4 bits, 4 bits */
+	u8 vlcap_inittype;			/* 4 bits, 4 bits */
+	u8 vl_high_limit;
+	u8 vl_arb_high_cap;
+	u8 vl_arb_low_cap;
+	u8 inittypereply_mtucap;		/* 4 bits, 4 bits */
+	u8 vlstallcnt_hoqlife;			/* 3 bits, 5 bits */
+	u8 operationalvl_pei_peo_fpi_fpo;	/* 4 bits, 1, 1, 1, 1 */
+	__be16 mkey_violations;
+	__be16 pkey_violations;
+	__be16 qkey_violations;
+	u8 guid_cap;
+	u8 clientrereg_resv_subnetto;		/* 1 bit, 2 bits, 5 */
+	u8 resv_resptimevalue;			/* 3 bits, 5 bits */
+	u8 localphyerrors_overrunerrors;	/* 4 bits, 4 bits */
+	__be16 max_credit_hint;
+	u8 resv;
+	u8 link_roundtrip_latency[3];
+};
+
+struct ib_node_info {
+	u8 base_version;
+	u8 class_version;
+	u8 node_type;
+	u8 num_ports;
+	__be64 sys_guid;
+	__be64 node_guid;
+	__be64 port_guid;
+	__be16 partition_cap;
+	__be16 device_id;
+	__be32 revision;
+	u8 local_port_num;
+	u8 vendor_id[3];
+} __packed;
+
+struct ib_vl_weight_elem {
+	u8      vl;     /* IB: VL is low 4 bits, upper 4 bits reserved */
+                        /* OPA: VL is low 5 bits, upper 3 bits reserved */
+	u8      weight;
+};
+
+static inline u8
+ib_get_smp_direction(struct ib_smp *smp)
+{
+	return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION);
+}
+
+/*
+ * SM Trap/Notice numbers
+ */
+#define IB_NOTICE_TRAP_LLI_THRESH	cpu_to_be16(129)
+#define IB_NOTICE_TRAP_EBO_THRESH	cpu_to_be16(130)
+#define IB_NOTICE_TRAP_FLOW_UPDATE	cpu_to_be16(131)
+#define IB_NOTICE_TRAP_CAP_MASK_CHG	cpu_to_be16(144)
+#define IB_NOTICE_TRAP_SYS_GUID_CHG	cpu_to_be16(145)
+#define IB_NOTICE_TRAP_BAD_MKEY		cpu_to_be16(256)
+#define IB_NOTICE_TRAP_BAD_PKEY		cpu_to_be16(257)
+#define IB_NOTICE_TRAP_BAD_QKEY		cpu_to_be16(258)
+
+/*
+ * Other local changes flags (trap 144).
+ */
+#define IB_NOTICE_TRAP_LSE_CHG		0x04	/* Link Speed Enable changed */
+#define IB_NOTICE_TRAP_LWE_CHG		0x02	/* Link Width Enable changed */
+#define IB_NOTICE_TRAP_NODE_DESC_CHG	0x01
+
+/*
+ * M_Key volation flags in dr_trunc_hop (trap 256).
+ */
+#define IB_NOTICE_TRAP_DR_NOTICE	0x80
+#define IB_NOTICE_TRAP_DR_TRUNC		0x40
+
+
+#endif /* IB_SMI_H */
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
new file mode 100644
index 000000000..70597508c
--- /dev/null
+++ b/include/rdma/ib_umem.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2007 Cisco Systems.  All rights reserved.
+ */
+
+#ifndef IB_UMEM_H
+#define IB_UMEM_H
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
+
+struct ib_ucontext;
+struct ib_umem_odp;
+
+struct ib_umem {
+	struct ib_device       *ibdev;
+	struct mm_struct       *owning_mm;
+	u64 iova;
+	size_t			length;
+	unsigned long		address;
+	u32 writable : 1;
+	u32 is_odp : 1;
+	struct work_struct	work;
+	struct sg_table sg_head;
+	int             nmap;
+	unsigned int    sg_nents;
+};
+
+/* Returns the offset of the umem start relative to the first page. */
+static inline int ib_umem_offset(struct ib_umem *umem)
+{
+	return umem->address & ~PAGE_MASK;
+}
+
+static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem,
+					    unsigned long pgsz)
+{
+	return (size_t)((ALIGN(umem->iova + umem->length, pgsz) -
+			 ALIGN_DOWN(umem->iova, pgsz))) /
+	       pgsz;
+}
+
+static inline size_t ib_umem_num_pages(struct ib_umem *umem)
+{
+	return ib_umem_num_dma_blocks(umem, PAGE_SIZE);
+}
+
+static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter,
+						struct ib_umem *umem,
+						unsigned long pgsz)
+{
+	__rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz);
+}
+
+/**
+ * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem
+ * @umem: umem to iterate over
+ * @pgsz: Page size to split the list into
+ *
+ * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The
+ * returned DMA blocks will be aligned to pgsz and span the range:
+ * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz)
+ *
+ * Performs exactly ib_umem_num_dma_blocks() iterations.
+ */
+#define rdma_umem_for_each_dma_block(umem, biter, pgsz)                        \
+	for (__rdma_umem_block_iter_start(biter, umem, pgsz);                  \
+	     __rdma_block_iter_next(biter);)
+
+#ifdef CONFIG_INFINIBAND_USER_MEM
+
+struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
+			    size_t size, int access);
+void ib_umem_release(struct ib_umem *umem);
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      size_t length);
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+				     unsigned long pgsz_bitmap,
+				     unsigned long virt);
+
+#else /* CONFIG_INFINIBAND_USER_MEM */
+
+#include <linux/err.h>
+
+static inline struct ib_umem *ib_umem_get(struct ib_device *device,
+					  unsigned long addr, size_t size,
+					  int access)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline void ib_umem_release(struct ib_umem *umem) { }
+static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      		    size_t length) {
+	return -EINVAL;
+}
+static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+						   unsigned long pgsz_bitmap,
+						   unsigned long virt)
+{
+	return 0;
+}
+
+#endif /* CONFIG_INFINIBAND_USER_MEM */
+
+#endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
new file mode 100644
index 000000000..0844c1d05
--- /dev/null
+++ b/include/rdma/ib_umem_odp.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef IB_UMEM_ODP_H
+#define IB_UMEM_ODP_H
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+
+struct ib_umem_odp {
+	struct ib_umem umem;
+	struct mmu_interval_notifier notifier;
+	struct pid *tgid;
+
+	/* An array of the pfns included in the on-demand paging umem. */
+	unsigned long *pfn_list;
+
+	/*
+	 * An array with DMA addresses mapped for pfns in pfn_list.
+	 * The lower two bits designate access permissions.
+	 * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT.
+	 */
+	dma_addr_t		*dma_list;
+	/*
+	 * The umem_mutex protects the page_list and dma_list fields of an ODP
+	 * umem, allowing only a single thread to map/unmap pages. The mutex
+	 * also protects access to the mmu notifier counters.
+	 */
+	struct mutex		umem_mutex;
+	void			*private; /* for the HW driver to use. */
+
+	int npages;
+
+	/*
+	 * An implicit odp umem cannot be DMA mapped, has 0 length, and serves
+	 * only as an anchor for the driver to hold onto the per_mm. FIXME:
+	 * This should be removed and drivers should work with the per_mm
+	 * directly.
+	 */
+	bool is_implicit_odp;
+
+	unsigned int		page_shift;
+};
+
+static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
+{
+	return container_of(umem, struct ib_umem_odp, umem);
+}
+
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
+{
+	return umem_odp->notifier.interval_tree.start;
+}
+
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
+{
+	return umem_odp->notifier.interval_tree.last + 1;
+}
+
+static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
+{
+	return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >>
+	       umem_odp->page_shift;
+}
+
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT  (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
+struct ib_umem_odp *
+ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size,
+		int access, const struct mmu_interval_notifier_ops *ops);
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_device *device,
+					       int access);
+struct ib_umem_odp *
+ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr,
+			size_t size,
+			const struct mmu_interval_notifier_ops *ops);
+void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
+
+int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset,
+				 u64 bcnt, u64 access_mask, bool fault);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
+				 u64 bound);
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+static inline struct ib_umem_odp *
+ib_umem_odp_get(struct ib_device *device, unsigned long addr, size_t size,
+		int access, const struct mmu_interval_notifier_ops *ops)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+#endif /* IB_UMEM_ODP_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
new file mode 100644
index 000000000..ac6ffa561
--- /dev/null
+++ b/include/rdma/ib_verbs.h
@@ -0,0 +1,4716 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
+ * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
+ * Copyright (c) 2004 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2006, 2007 Cisco Systems.  All rights reserved.
+ */
+
+#ifndef IB_VERBS_H
+#define IB_VERBS_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+#include <linux/irq_poll.h>
+#include <uapi/linux/if_ether.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/refcount.h>
+#include <linux/if_link.h>
+#include <linux/atomic.h>
+#include <linux/mmu_notifier.h>
+#include <linux/uaccess.h>
+#include <linux/cgroup_rdma.h>
+#include <linux/irqflags.h>
+#include <linux/preempt.h>
+#include <linux/dim.h>
+#include <uapi/rdma/ib_user_verbs.h>
+#include <rdma/rdma_counter.h>
+#include <rdma/restrack.h>
+#include <rdma/signature.h>
+#include <uapi/rdma/rdma_user_ioctl.h>
+#include <uapi/rdma/ib_user_ioctl_verbs.h>
+
+#define IB_FW_VERSION_NAME_MAX	ETHTOOL_FWVERS_LEN
+
+struct ib_umem_odp;
+struct ib_uqp_object;
+struct ib_usrq_object;
+struct ib_uwq_object;
+struct rdma_cm_id;
+
+extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_comp_wq;
+extern struct workqueue_struct *ib_comp_unbound_wq;
+
+struct ib_ucq_object;
+
+__printf(3, 4) __cold
+void ibdev_printk(const char *level, const struct ib_device *ibdev,
+		  const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_emerg(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_alert(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_crit(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_err(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_warn(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_notice(const struct ib_device *ibdev, const char *format, ...);
+__printf(2, 3) __cold
+void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+#define ibdev_dbg(__dev, format, args...)                       \
+	dynamic_ibdev_dbg(__dev, format, ##args)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...)           \
+do {                                                                    \
+	static DEFINE_RATELIMIT_STATE(_rs,                              \
+				      DEFAULT_RATELIMIT_INTERVAL,       \
+				      DEFAULT_RATELIMIT_BURST);         \
+	if (__ratelimit(&_rs))                                          \
+		ibdev_level(ibdev, fmt, ##__VA_ARGS__);                 \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || \
+	(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...)                          \
+do {                                                                    \
+	static DEFINE_RATELIMIT_STATE(_rs,                              \
+				      DEFAULT_RATELIMIT_INTERVAL,       \
+				      DEFAULT_RATELIMIT_BURST);         \
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);                 \
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs))      \
+		__dynamic_ibdev_dbg(&descriptor, ibdev, fmt,            \
+				    ##__VA_ARGS__);                     \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
+union ib_gid {
+	u8	raw[16];
+	struct {
+		__be64	subnet_prefix;
+		__be64	interface_id;
+	} global;
+};
+
+extern union ib_gid zgid;
+
+enum ib_gid_type {
+	IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB,
+	IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1,
+	IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2,
+	IB_GID_TYPE_SIZE
+};
+
+#define ROCE_V2_UDP_DPORT      4791
+struct ib_gid_attr {
+	struct net_device __rcu	*ndev;
+	struct ib_device	*device;
+	union ib_gid		gid;
+	enum ib_gid_type	gid_type;
+	u16			index;
+	u8			port_num;
+};
+
+enum {
+	/* set the local administered indication */
+	IB_SA_WELL_KNOWN_GUID	= BIT_ULL(57) | 2,
+};
+
+enum rdma_transport_type {
+	RDMA_TRANSPORT_IB,
+	RDMA_TRANSPORT_IWARP,
+	RDMA_TRANSPORT_USNIC,
+	RDMA_TRANSPORT_USNIC_UDP,
+	RDMA_TRANSPORT_UNSPECIFIED,
+};
+
+enum rdma_protocol_type {
+	RDMA_PROTOCOL_IB,
+	RDMA_PROTOCOL_IBOE,
+	RDMA_PROTOCOL_IWARP,
+	RDMA_PROTOCOL_USNIC_UDP
+};
+
+__attribute_const__ enum rdma_transport_type
+rdma_node_get_transport(unsigned int node_type);
+
+enum rdma_network_type {
+	RDMA_NETWORK_IB,
+	RDMA_NETWORK_ROCE_V1,
+	RDMA_NETWORK_IPV4,
+	RDMA_NETWORK_IPV6
+};
+
+static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
+{
+	if (network_type == RDMA_NETWORK_IPV4 ||
+	    network_type == RDMA_NETWORK_IPV6)
+		return IB_GID_TYPE_ROCE_UDP_ENCAP;
+	else if (network_type == RDMA_NETWORK_ROCE_V1)
+		return IB_GID_TYPE_ROCE;
+	else
+		return IB_GID_TYPE_IB;
+}
+
+static inline enum rdma_network_type
+rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
+{
+	if (attr->gid_type == IB_GID_TYPE_IB)
+		return RDMA_NETWORK_IB;
+
+	if (attr->gid_type == IB_GID_TYPE_ROCE)
+		return RDMA_NETWORK_ROCE_V1;
+
+	if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
+		return RDMA_NETWORK_IPV4;
+	else
+		return RDMA_NETWORK_IPV6;
+}
+
+enum rdma_link_layer {
+	IB_LINK_LAYER_UNSPECIFIED,
+	IB_LINK_LAYER_INFINIBAND,
+	IB_LINK_LAYER_ETHERNET,
+};
+
+enum ib_device_cap_flags {
+	IB_DEVICE_RESIZE_MAX_WR			= (1 << 0),
+	IB_DEVICE_BAD_PKEY_CNTR			= (1 << 1),
+	IB_DEVICE_BAD_QKEY_CNTR			= (1 << 2),
+	IB_DEVICE_RAW_MULTI			= (1 << 3),
+	IB_DEVICE_AUTO_PATH_MIG			= (1 << 4),
+	IB_DEVICE_CHANGE_PHY_PORT		= (1 << 5),
+	IB_DEVICE_UD_AV_PORT_ENFORCE		= (1 << 6),
+	IB_DEVICE_CURR_QP_STATE_MOD		= (1 << 7),
+	IB_DEVICE_SHUTDOWN_PORT			= (1 << 8),
+	/* Not in use, former INIT_TYPE		= (1 << 9),*/
+	IB_DEVICE_PORT_ACTIVE_EVENT		= (1 << 10),
+	IB_DEVICE_SYS_IMAGE_GUID		= (1 << 11),
+	IB_DEVICE_RC_RNR_NAK_GEN		= (1 << 12),
+	IB_DEVICE_SRQ_RESIZE			= (1 << 13),
+	IB_DEVICE_N_NOTIFY_CQ			= (1 << 14),
+
+	/*
+	 * This device supports a per-device lkey or stag that can be
+	 * used without performing a memory registration for the local
+	 * memory.  Note that ULPs should never check this flag, but
+	 * instead of use the local_dma_lkey flag in the ib_pd structure,
+	 * which will always contain a usable lkey.
+	 */
+	IB_DEVICE_LOCAL_DMA_LKEY		= (1 << 15),
+	/* Reserved, old SEND_W_INV		= (1 << 16),*/
+	IB_DEVICE_MEM_WINDOW			= (1 << 17),
+	/*
+	 * Devices should set IB_DEVICE_UD_IP_SUM if they support
+	 * insertion of UDP and TCP checksum on outgoing UD IPoIB
+	 * messages and can verify the validity of checksum for
+	 * incoming messages.  Setting this flag implies that the
+	 * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
+	 */
+	IB_DEVICE_UD_IP_CSUM			= (1 << 18),
+	IB_DEVICE_UD_TSO			= (1 << 19),
+	IB_DEVICE_XRC				= (1 << 20),
+
+	/*
+	 * This device supports the IB "base memory management extension",
+	 * which includes support for fast registrations (IB_WR_REG_MR,
+	 * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs).  This flag should
+	 * also be set by any iWarp device which must support FRs to comply
+	 * to the iWarp verbs spec.  iWarp devices also support the
+	 * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
+	 * stag.
+	 */
+	IB_DEVICE_MEM_MGT_EXTENSIONS		= (1 << 21),
+	IB_DEVICE_BLOCK_MULTICAST_LOOPBACK	= (1 << 22),
+	IB_DEVICE_MEM_WINDOW_TYPE_2A		= (1 << 23),
+	IB_DEVICE_MEM_WINDOW_TYPE_2B		= (1 << 24),
+	IB_DEVICE_RC_IP_CSUM			= (1 << 25),
+	/* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
+	IB_DEVICE_RAW_IP_CSUM			= (1 << 26),
+	/*
+	 * Devices should set IB_DEVICE_CROSS_CHANNEL if they
+	 * support execution of WQEs that involve synchronization
+	 * of I/O operations with single completion queue managed
+	 * by hardware.
+	 */
+	IB_DEVICE_CROSS_CHANNEL			= (1 << 27),
+	IB_DEVICE_MANAGED_FLOW_STEERING		= (1 << 29),
+	IB_DEVICE_INTEGRITY_HANDOVER		= (1 << 30),
+	IB_DEVICE_ON_DEMAND_PAGING		= (1ULL << 31),
+	IB_DEVICE_SG_GAPS_REG			= (1ULL << 32),
+	IB_DEVICE_VIRTUAL_FUNCTION		= (1ULL << 33),
+	/* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
+	IB_DEVICE_RAW_SCATTER_FCS		= (1ULL << 34),
+	IB_DEVICE_RDMA_NETDEV_OPA		= (1ULL << 35),
+	/* The device supports padding incoming writes to cacheline. */
+	IB_DEVICE_PCI_WRITE_END_PADDING		= (1ULL << 36),
+	IB_DEVICE_ALLOW_USER_UNREG		= (1ULL << 37),
+};
+
+enum ib_atomic_cap {
+	IB_ATOMIC_NONE,
+	IB_ATOMIC_HCA,
+	IB_ATOMIC_GLOB
+};
+
+enum ib_odp_general_cap_bits {
+	IB_ODP_SUPPORT		= 1 << 0,
+	IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
+};
+
+enum ib_odp_transport_cap_bits {
+	IB_ODP_SUPPORT_SEND	= 1 << 0,
+	IB_ODP_SUPPORT_RECV	= 1 << 1,
+	IB_ODP_SUPPORT_WRITE	= 1 << 2,
+	IB_ODP_SUPPORT_READ	= 1 << 3,
+	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
+	IB_ODP_SUPPORT_SRQ_RECV	= 1 << 5,
+};
+
+struct ib_odp_caps {
+	uint64_t general_caps;
+	struct {
+		uint32_t  rc_odp_caps;
+		uint32_t  uc_odp_caps;
+		uint32_t  ud_odp_caps;
+		uint32_t  xrc_odp_caps;
+	} per_transport_caps;
+};
+
+struct ib_rss_caps {
+	/* Corresponding bit will be set if qp type from
+	 * 'enum ib_qp_type' is supported, e.g.
+	 * supported_qpts |= 1 << IB_QPT_UD
+	 */
+	u32 supported_qpts;
+	u32 max_rwq_indirection_tables;
+	u32 max_rwq_indirection_table_size;
+};
+
+enum ib_tm_cap_flags {
+	/*  Support tag matching with rendezvous offload for RC transport */
+	IB_TM_CAP_RNDV_RC = 1 << 0,
+};
+
+struct ib_tm_caps {
+	/* Max size of RNDV header */
+	u32 max_rndv_hdr_size;
+	/* Max number of entries in tag matching list */
+	u32 max_num_tags;
+	/* From enum ib_tm_cap_flags */
+	u32 flags;
+	/* Max number of outstanding list operations */
+	u32 max_ops;
+	/* Max number of SGE in tag matching entry */
+	u32 max_sge;
+};
+
+struct ib_cq_init_attr {
+	unsigned int	cqe;
+	u32		comp_vector;
+	u32		flags;
+};
+
+enum ib_cq_attr_mask {
+	IB_CQ_MODERATE = 1 << 0,
+};
+
+struct ib_cq_caps {
+	u16     max_cq_moderation_count;
+	u16     max_cq_moderation_period;
+};
+
+struct ib_dm_mr_attr {
+	u64		length;
+	u64		offset;
+	u32		access_flags;
+};
+
+struct ib_dm_alloc_attr {
+	u64	length;
+	u32	alignment;
+	u32	flags;
+};
+
+struct ib_device_attr {
+	u64			fw_ver;
+	__be64			sys_image_guid;
+	u64			max_mr_size;
+	u64			page_size_cap;
+	u32			vendor_id;
+	u32			vendor_part_id;
+	u32			hw_ver;
+	int			max_qp;
+	int			max_qp_wr;
+	u64			device_cap_flags;
+	int			max_send_sge;
+	int			max_recv_sge;
+	int			max_sge_rd;
+	int			max_cq;
+	int			max_cqe;
+	int			max_mr;
+	int			max_pd;
+	int			max_qp_rd_atom;
+	int			max_ee_rd_atom;
+	int			max_res_rd_atom;
+	int			max_qp_init_rd_atom;
+	int			max_ee_init_rd_atom;
+	enum ib_atomic_cap	atomic_cap;
+	enum ib_atomic_cap	masked_atomic_cap;
+	int			max_ee;
+	int			max_rdd;
+	int			max_mw;
+	int			max_raw_ipv6_qp;
+	int			max_raw_ethy_qp;
+	int			max_mcast_grp;
+	int			max_mcast_qp_attach;
+	int			max_total_mcast_qp_attach;
+	int			max_ah;
+	int			max_srq;
+	int			max_srq_wr;
+	int			max_srq_sge;
+	unsigned int		max_fast_reg_page_list_len;
+	unsigned int		max_pi_fast_reg_page_list_len;
+	u16			max_pkeys;
+	u8			local_ca_ack_delay;
+	int			sig_prot_cap;
+	int			sig_guard_cap;
+	struct ib_odp_caps	odp_caps;
+	uint64_t		timestamp_mask;
+	uint64_t		hca_core_clock; /* in KHZ */
+	struct ib_rss_caps	rss_caps;
+	u32			max_wq_type_rq;
+	u32			raw_packet_caps; /* Use ib_raw_packet_caps enum */
+	struct ib_tm_caps	tm_caps;
+	struct ib_cq_caps       cq_caps;
+	u64			max_dm_size;
+	/* Max entries for sgl for optimized performance per READ */
+	u32			max_sgl_rd;
+};
+
+enum ib_mtu {
+	IB_MTU_256  = 1,
+	IB_MTU_512  = 2,
+	IB_MTU_1024 = 3,
+	IB_MTU_2048 = 4,
+	IB_MTU_4096 = 5
+};
+
+enum opa_mtu {
+	OPA_MTU_8192 = 6,
+	OPA_MTU_10240 = 7
+};
+
+static inline int ib_mtu_enum_to_int(enum ib_mtu mtu)
+{
+	switch (mtu) {
+	case IB_MTU_256:  return  256;
+	case IB_MTU_512:  return  512;
+	case IB_MTU_1024: return 1024;
+	case IB_MTU_2048: return 2048;
+	case IB_MTU_4096: return 4096;
+	default: 	  return -1;
+	}
+}
+
+static inline enum ib_mtu ib_mtu_int_to_enum(int mtu)
+{
+	if (mtu >= 4096)
+		return IB_MTU_4096;
+	else if (mtu >= 2048)
+		return IB_MTU_2048;
+	else if (mtu >= 1024)
+		return IB_MTU_1024;
+	else if (mtu >= 512)
+		return IB_MTU_512;
+	else
+		return IB_MTU_256;
+}
+
+static inline int opa_mtu_enum_to_int(enum opa_mtu mtu)
+{
+	switch (mtu) {
+	case OPA_MTU_8192:
+		return 8192;
+	case OPA_MTU_10240:
+		return 10240;
+	default:
+		return(ib_mtu_enum_to_int((enum ib_mtu)mtu));
+	}
+}
+
+static inline enum opa_mtu opa_mtu_int_to_enum(int mtu)
+{
+	if (mtu >= 10240)
+		return OPA_MTU_10240;
+	else if (mtu >= 8192)
+		return OPA_MTU_8192;
+	else
+		return ((enum opa_mtu)ib_mtu_int_to_enum(mtu));
+}
+
+enum ib_port_state {
+	IB_PORT_NOP		= 0,
+	IB_PORT_DOWN		= 1,
+	IB_PORT_INIT		= 2,
+	IB_PORT_ARMED		= 3,
+	IB_PORT_ACTIVE		= 4,
+	IB_PORT_ACTIVE_DEFER	= 5
+};
+
+enum ib_port_phys_state {
+	IB_PORT_PHYS_STATE_SLEEP = 1,
+	IB_PORT_PHYS_STATE_POLLING = 2,
+	IB_PORT_PHYS_STATE_DISABLED = 3,
+	IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+	IB_PORT_PHYS_STATE_LINK_UP = 5,
+	IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+	IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
+enum ib_port_width {
+	IB_WIDTH_1X	= 1,
+	IB_WIDTH_2X	= 16,
+	IB_WIDTH_4X	= 2,
+	IB_WIDTH_8X	= 4,
+	IB_WIDTH_12X	= 8
+};
+
+static inline int ib_width_enum_to_int(enum ib_port_width width)
+{
+	switch (width) {
+	case IB_WIDTH_1X:  return  1;
+	case IB_WIDTH_2X:  return  2;
+	case IB_WIDTH_4X:  return  4;
+	case IB_WIDTH_8X:  return  8;
+	case IB_WIDTH_12X: return 12;
+	default: 	  return -1;
+	}
+}
+
+enum ib_port_speed {
+	IB_SPEED_SDR	= 1,
+	IB_SPEED_DDR	= 2,
+	IB_SPEED_QDR	= 4,
+	IB_SPEED_FDR10	= 8,
+	IB_SPEED_FDR	= 16,
+	IB_SPEED_EDR	= 32,
+	IB_SPEED_HDR	= 64,
+	IB_SPEED_NDR	= 128,
+};
+
+/**
+ * struct rdma_hw_stats
+ * @lock - Mutex to protect parallel write access to lifespan and values
+ *    of counters, which are 64bits and not guaranteeed to be written
+ *    atomicaly on 32bits systems.
+ * @timestamp - Used by the core code to track when the last update was
+ * @lifespan - Used by the core code to determine how old the counters
+ *   should be before being updated again.  Stored in jiffies, defaults
+ *   to 10 milliseconds, drivers can override the default be specifying
+ *   their own value during their allocation routine.
+ * @name - Array of pointers to static names used for the counters in
+ *   directory.
+ * @num_counters - How many hardware counters there are.  If name is
+ *   shorter than this number, a kernel oops will result.  Driver authors
+ *   are encouraged to leave BUILD_BUG_ON(ARRAY_SIZE(@name) < num_counters)
+ *   in their code to prevent this.
+ * @value - Array of u64 counters that are accessed by the sysfs code and
+ *   filled in by the drivers get_stats routine
+ */
+struct rdma_hw_stats {
+	struct mutex	lock; /* Protect lifespan and values[] */
+	unsigned long	timestamp;
+	unsigned long	lifespan;
+	const char * const *names;
+	int		num_counters;
+	u64		value[];
+};
+
+#define RDMA_HW_STATS_DEFAULT_LIFESPAN 10
+/**
+ * rdma_alloc_hw_stats_struct - Helper function to allocate dynamic struct
+ *   for drivers.
+ * @names - Array of static const char *
+ * @num_counters - How many elements in array
+ * @lifespan - How many milliseconds between updates
+ */
+static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
+		const char * const *names, int num_counters,
+		unsigned long lifespan)
+{
+	struct rdma_hw_stats *stats;
+
+	stats = kzalloc(sizeof(*stats) + num_counters * sizeof(u64),
+			GFP_KERNEL);
+	if (!stats)
+		return NULL;
+	stats->names = names;
+	stats->num_counters = num_counters;
+	stats->lifespan = msecs_to_jiffies(lifespan);
+
+	return stats;
+}
+
+
+/* Define bits for the various functionality this port needs to be supported by
+ * the core.
+ */
+/* Management                           0x00000FFF */
+#define RDMA_CORE_CAP_IB_MAD            0x00000001
+#define RDMA_CORE_CAP_IB_SMI            0x00000002
+#define RDMA_CORE_CAP_IB_CM             0x00000004
+#define RDMA_CORE_CAP_IW_CM             0x00000008
+#define RDMA_CORE_CAP_IB_SA             0x00000010
+#define RDMA_CORE_CAP_OPA_MAD           0x00000020
+
+/* Address format                       0x000FF000 */
+#define RDMA_CORE_CAP_AF_IB             0x00001000
+#define RDMA_CORE_CAP_ETH_AH            0x00002000
+#define RDMA_CORE_CAP_OPA_AH            0x00004000
+#define RDMA_CORE_CAP_IB_GRH_REQUIRED   0x00008000
+
+/* Protocol                             0xFFF00000 */
+#define RDMA_CORE_CAP_PROT_IB           0x00100000
+#define RDMA_CORE_CAP_PROT_ROCE         0x00200000
+#define RDMA_CORE_CAP_PROT_IWARP        0x00400000
+#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
+#define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
+#define RDMA_CORE_CAP_PROT_USNIC        0x02000000
+
+#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
+					| RDMA_CORE_CAP_PROT_ROCE     \
+					| RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
+
+#define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
+					| RDMA_CORE_CAP_IB_MAD \
+					| RDMA_CORE_CAP_IB_SMI \
+					| RDMA_CORE_CAP_IB_CM  \
+					| RDMA_CORE_CAP_IB_SA  \
+					| RDMA_CORE_CAP_AF_IB)
+#define RDMA_CORE_PORT_IBA_ROCE        (RDMA_CORE_CAP_PROT_ROCE \
+					| RDMA_CORE_CAP_IB_MAD  \
+					| RDMA_CORE_CAP_IB_CM   \
+					| RDMA_CORE_CAP_AF_IB   \
+					| RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP			\
+					(RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
+					| RDMA_CORE_CAP_IB_MAD  \
+					| RDMA_CORE_CAP_IB_CM   \
+					| RDMA_CORE_CAP_AF_IB   \
+					| RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IWARP           (RDMA_CORE_CAP_PROT_IWARP \
+					| RDMA_CORE_CAP_IW_CM)
+#define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
+					| RDMA_CORE_CAP_OPA_MAD)
+
+#define RDMA_CORE_PORT_RAW_PACKET	(RDMA_CORE_CAP_PROT_RAW_PACKET)
+
+#define RDMA_CORE_PORT_USNIC		(RDMA_CORE_CAP_PROT_USNIC)
+
+struct ib_port_attr {
+	u64			subnet_prefix;
+	enum ib_port_state	state;
+	enum ib_mtu		max_mtu;
+	enum ib_mtu		active_mtu;
+	u32                     phys_mtu;
+	int			gid_tbl_len;
+	unsigned int		ip_gids:1;
+	/* This is the value from PortInfo CapabilityMask, defined by IBA */
+	u32			port_cap_flags;
+	u32			max_msg_sz;
+	u32			bad_pkey_cntr;
+	u32			qkey_viol_cntr;
+	u16			pkey_tbl_len;
+	u32			sm_lid;
+	u32			lid;
+	u8			lmc;
+	u8			max_vl_num;
+	u8			sm_sl;
+	u8			subnet_timeout;
+	u8			init_type_reply;
+	u8			active_width;
+	u16			active_speed;
+	u8                      phys_state;
+	u16			port_cap_flags2;
+};
+
+enum ib_device_modify_flags {
+	IB_DEVICE_MODIFY_SYS_IMAGE_GUID	= 1 << 0,
+	IB_DEVICE_MODIFY_NODE_DESC	= 1 << 1
+};
+
+#define IB_DEVICE_NODE_DESC_MAX 64
+
+struct ib_device_modify {
+	u64	sys_image_guid;
+	char	node_desc[IB_DEVICE_NODE_DESC_MAX];
+};
+
+enum ib_port_modify_flags {
+	IB_PORT_SHUTDOWN		= 1,
+	IB_PORT_INIT_TYPE		= (1<<2),
+	IB_PORT_RESET_QKEY_CNTR		= (1<<3),
+	IB_PORT_OPA_MASK_CHG		= (1<<4)
+};
+
+struct ib_port_modify {
+	u32	set_port_cap_mask;
+	u32	clr_port_cap_mask;
+	u8	init_type;
+};
+
+enum ib_event_type {
+	IB_EVENT_CQ_ERR,
+	IB_EVENT_QP_FATAL,
+	IB_EVENT_QP_REQ_ERR,
+	IB_EVENT_QP_ACCESS_ERR,
+	IB_EVENT_COMM_EST,
+	IB_EVENT_SQ_DRAINED,
+	IB_EVENT_PATH_MIG,
+	IB_EVENT_PATH_MIG_ERR,
+	IB_EVENT_DEVICE_FATAL,
+	IB_EVENT_PORT_ACTIVE,
+	IB_EVENT_PORT_ERR,
+	IB_EVENT_LID_CHANGE,
+	IB_EVENT_PKEY_CHANGE,
+	IB_EVENT_SM_CHANGE,
+	IB_EVENT_SRQ_ERR,
+	IB_EVENT_SRQ_LIMIT_REACHED,
+	IB_EVENT_QP_LAST_WQE_REACHED,
+	IB_EVENT_CLIENT_REREGISTER,
+	IB_EVENT_GID_CHANGE,
+	IB_EVENT_WQ_FATAL,
+};
+
+const char *__attribute_const__ ib_event_msg(enum ib_event_type event);
+
+struct ib_event {
+	struct ib_device	*device;
+	union {
+		struct ib_cq	*cq;
+		struct ib_qp	*qp;
+		struct ib_srq	*srq;
+		struct ib_wq	*wq;
+		u8		port_num;
+	} element;
+	enum ib_event_type	event;
+};
+
+struct ib_event_handler {
+	struct ib_device *device;
+	void            (*handler)(struct ib_event_handler *, struct ib_event *);
+	struct list_head  list;
+};
+
+#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler)		\
+	do {							\
+		(_ptr)->device  = _device;			\
+		(_ptr)->handler = _handler;			\
+		INIT_LIST_HEAD(&(_ptr)->list);			\
+	} while (0)
+
+struct ib_global_route {
+	const struct ib_gid_attr *sgid_attr;
+	union ib_gid	dgid;
+	u32		flow_label;
+	u8		sgid_index;
+	u8		hop_limit;
+	u8		traffic_class;
+};
+
+struct ib_grh {
+	__be32		version_tclass_flow;
+	__be16		paylen;
+	u8		next_hdr;
+	u8		hop_limit;
+	union ib_gid	sgid;
+	union ib_gid	dgid;
+};
+
+union rdma_network_hdr {
+	struct ib_grh ibgrh;
+	struct {
+		/* The IB spec states that if it's IPv4, the header
+		 * is located in the last 20 bytes of the header.
+		 */
+		u8		reserved[20];
+		struct iphdr	roce4grh;
+	};
+};
+
+#define IB_QPN_MASK		0xFFFFFF
+
+enum {
+	IB_MULTICAST_QPN = 0xffffff
+};
+
+#define IB_LID_PERMISSIVE	cpu_to_be16(0xFFFF)
+#define IB_MULTICAST_LID_BASE	cpu_to_be16(0xC000)
+
+enum ib_ah_flags {
+	IB_AH_GRH	= 1
+};
+
+enum ib_rate {
+	IB_RATE_PORT_CURRENT = 0,
+	IB_RATE_2_5_GBPS = 2,
+	IB_RATE_5_GBPS   = 5,
+	IB_RATE_10_GBPS  = 3,
+	IB_RATE_20_GBPS  = 6,
+	IB_RATE_30_GBPS  = 4,
+	IB_RATE_40_GBPS  = 7,
+	IB_RATE_60_GBPS  = 8,
+	IB_RATE_80_GBPS  = 9,
+	IB_RATE_120_GBPS = 10,
+	IB_RATE_14_GBPS  = 11,
+	IB_RATE_56_GBPS  = 12,
+	IB_RATE_112_GBPS = 13,
+	IB_RATE_168_GBPS = 14,
+	IB_RATE_25_GBPS  = 15,
+	IB_RATE_100_GBPS = 16,
+	IB_RATE_200_GBPS = 17,
+	IB_RATE_300_GBPS = 18,
+	IB_RATE_28_GBPS  = 19,
+	IB_RATE_50_GBPS  = 20,
+	IB_RATE_400_GBPS = 21,
+	IB_RATE_600_GBPS = 22,
+};
+
+/**
+ * ib_rate_to_mult - Convert the IB rate enum to a multiple of the
+ * base rate of 2.5 Gbit/sec.  For example, IB_RATE_5_GBPS will be
+ * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec.
+ * @rate: rate to convert.
+ */
+__attribute_const__ int ib_rate_to_mult(enum ib_rate rate);
+
+/**
+ * ib_rate_to_mbps - Convert the IB rate enum to Mbps.
+ * For example, IB_RATE_2_5_GBPS will be converted to 2500.
+ * @rate: rate to convert.
+ */
+__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate);
+
+
+/**
+ * enum ib_mr_type - memory region type
+ * @IB_MR_TYPE_MEM_REG:       memory region that is used for
+ *                            normal registration
+ * @IB_MR_TYPE_SG_GAPS:       memory region that is capable to
+ *                            register any arbitrary sg lists (without
+ *                            the normal mr constraints - see
+ *                            ib_map_mr_sg)
+ * @IB_MR_TYPE_DM:            memory region that is used for device
+ *                            memory registration
+ * @IB_MR_TYPE_USER:          memory region that is used for the user-space
+ *                            application
+ * @IB_MR_TYPE_DMA:           memory region that is used for DMA operations
+ *                            without address translations (VA=PA)
+ * @IB_MR_TYPE_INTEGRITY:     memory region that is used for
+ *                            data integrity operations
+ */
+enum ib_mr_type {
+	IB_MR_TYPE_MEM_REG,
+	IB_MR_TYPE_SG_GAPS,
+	IB_MR_TYPE_DM,
+	IB_MR_TYPE_USER,
+	IB_MR_TYPE_DMA,
+	IB_MR_TYPE_INTEGRITY,
+};
+
+enum ib_mr_status_check {
+	IB_MR_CHECK_SIG_STATUS = 1,
+};
+
+/**
+ * struct ib_mr_status - Memory region status container
+ *
+ * @fail_status: Bitmask of MR checks status. For each
+ *     failed check a corresponding status bit is set.
+ * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS
+ *     failure.
+ */
+struct ib_mr_status {
+	u32		    fail_status;
+	struct ib_sig_err   sig_err;
+};
+
+/**
+ * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate
+ * enum.
+ * @mult: multiple to convert.
+ */
+__attribute_const__ enum ib_rate mult_to_ib_rate(int mult);
+
+struct rdma_ah_init_attr {
+	struct rdma_ah_attr *ah_attr;
+	u32 flags;
+	struct net_device *xmit_slave;
+};
+
+enum rdma_ah_attr_type {
+	RDMA_AH_ATTR_TYPE_UNDEFINED,
+	RDMA_AH_ATTR_TYPE_IB,
+	RDMA_AH_ATTR_TYPE_ROCE,
+	RDMA_AH_ATTR_TYPE_OPA,
+};
+
+struct ib_ah_attr {
+	u16			dlid;
+	u8			src_path_bits;
+};
+
+struct roce_ah_attr {
+	u8			dmac[ETH_ALEN];
+};
+
+struct opa_ah_attr {
+	u32			dlid;
+	u8			src_path_bits;
+	bool			make_grd;
+};
+
+struct rdma_ah_attr {
+	struct ib_global_route	grh;
+	u8			sl;
+	u8			static_rate;
+	u8			port_num;
+	u8			ah_flags;
+	enum rdma_ah_attr_type type;
+	union {
+		struct ib_ah_attr ib;
+		struct roce_ah_attr roce;
+		struct opa_ah_attr opa;
+	};
+};
+
+enum ib_wc_status {
+	IB_WC_SUCCESS,
+	IB_WC_LOC_LEN_ERR,
+	IB_WC_LOC_QP_OP_ERR,
+	IB_WC_LOC_EEC_OP_ERR,
+	IB_WC_LOC_PROT_ERR,
+	IB_WC_WR_FLUSH_ERR,
+	IB_WC_MW_BIND_ERR,
+	IB_WC_BAD_RESP_ERR,
+	IB_WC_LOC_ACCESS_ERR,
+	IB_WC_REM_INV_REQ_ERR,
+	IB_WC_REM_ACCESS_ERR,
+	IB_WC_REM_OP_ERR,
+	IB_WC_RETRY_EXC_ERR,
+	IB_WC_RNR_RETRY_EXC_ERR,
+	IB_WC_LOC_RDD_VIOL_ERR,
+	IB_WC_REM_INV_RD_REQ_ERR,
+	IB_WC_REM_ABORT_ERR,
+	IB_WC_INV_EECN_ERR,
+	IB_WC_INV_EEC_STATE_ERR,
+	IB_WC_FATAL_ERR,
+	IB_WC_RESP_TIMEOUT_ERR,
+	IB_WC_GENERAL_ERR
+};
+
+const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status);
+
+enum ib_wc_opcode {
+	IB_WC_SEND = IB_UVERBS_WC_SEND,
+	IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE,
+	IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ,
+	IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP,
+	IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD,
+	IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
+	IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
+	IB_WC_LSO = IB_UVERBS_WC_TSO,
+	IB_WC_REG_MR,
+	IB_WC_MASKED_COMP_SWAP,
+	IB_WC_MASKED_FETCH_ADD,
+/*
+ * Set value of IB_WC_RECV so consumers can test if a completion is a
+ * receive by testing (opcode & IB_WC_RECV).
+ */
+	IB_WC_RECV			= 1 << 7,
+	IB_WC_RECV_RDMA_WITH_IMM
+};
+
+enum ib_wc_flags {
+	IB_WC_GRH		= 1,
+	IB_WC_WITH_IMM		= (1<<1),
+	IB_WC_WITH_INVALIDATE	= (1<<2),
+	IB_WC_IP_CSUM_OK	= (1<<3),
+	IB_WC_WITH_SMAC		= (1<<4),
+	IB_WC_WITH_VLAN		= (1<<5),
+	IB_WC_WITH_NETWORK_HDR_TYPE	= (1<<6),
+};
+
+struct ib_wc {
+	union {
+		u64		wr_id;
+		struct ib_cqe	*wr_cqe;
+	};
+	enum ib_wc_status	status;
+	enum ib_wc_opcode	opcode;
+	u32			vendor_err;
+	u32			byte_len;
+	struct ib_qp	       *qp;
+	union {
+		__be32		imm_data;
+		u32		invalidate_rkey;
+	} ex;
+	u32			src_qp;
+	u32			slid;
+	int			wc_flags;
+	u16			pkey_index;
+	u8			sl;
+	u8			dlid_path_bits;
+	u8			port_num;	/* valid only for DR SMPs on switches */
+	u8			smac[ETH_ALEN];
+	u16			vlan_id;
+	u8			network_hdr_type;
+};
+
+enum ib_cq_notify_flags {
+	IB_CQ_SOLICITED			= 1 << 0,
+	IB_CQ_NEXT_COMP			= 1 << 1,
+	IB_CQ_SOLICITED_MASK		= IB_CQ_SOLICITED | IB_CQ_NEXT_COMP,
+	IB_CQ_REPORT_MISSED_EVENTS	= 1 << 2,
+};
+
+enum ib_srq_type {
+	IB_SRQT_BASIC = IB_UVERBS_SRQT_BASIC,
+	IB_SRQT_XRC = IB_UVERBS_SRQT_XRC,
+	IB_SRQT_TM = IB_UVERBS_SRQT_TM,
+};
+
+static inline bool ib_srq_has_cq(enum ib_srq_type srq_type)
+{
+	return srq_type == IB_SRQT_XRC ||
+	       srq_type == IB_SRQT_TM;
+}
+
+enum ib_srq_attr_mask {
+	IB_SRQ_MAX_WR	= 1 << 0,
+	IB_SRQ_LIMIT	= 1 << 1,
+};
+
+struct ib_srq_attr {
+	u32	max_wr;
+	u32	max_sge;
+	u32	srq_limit;
+};
+
+struct ib_srq_init_attr {
+	void		      (*event_handler)(struct ib_event *, void *);
+	void		       *srq_context;
+	struct ib_srq_attr	attr;
+	enum ib_srq_type	srq_type;
+
+	struct {
+		struct ib_cq   *cq;
+		union {
+			struct {
+				struct ib_xrcd *xrcd;
+			} xrc;
+
+			struct {
+				u32		max_num_tags;
+			} tag_matching;
+		};
+	} ext;
+};
+
+struct ib_qp_cap {
+	u32	max_send_wr;
+	u32	max_recv_wr;
+	u32	max_send_sge;
+	u32	max_recv_sge;
+	u32	max_inline_data;
+
+	/*
+	 * Maximum number of rdma_rw_ctx structures in flight at a time.
+	 * ib_create_qp() will calculate the right amount of neededed WRs
+	 * and MRs based on this.
+	 */
+	u32	max_rdma_ctxs;
+};
+
+enum ib_sig_type {
+	IB_SIGNAL_ALL_WR,
+	IB_SIGNAL_REQ_WR
+};
+
+enum ib_qp_type {
+	/*
+	 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
+	 * here (and in that order) since the MAD layer uses them as
+	 * indices into a 2-entry table.
+	 */
+	IB_QPT_SMI,
+	IB_QPT_GSI,
+
+	IB_QPT_RC = IB_UVERBS_QPT_RC,
+	IB_QPT_UC = IB_UVERBS_QPT_UC,
+	IB_QPT_UD = IB_UVERBS_QPT_UD,
+	IB_QPT_RAW_IPV6,
+	IB_QPT_RAW_ETHERTYPE,
+	IB_QPT_RAW_PACKET = IB_UVERBS_QPT_RAW_PACKET,
+	IB_QPT_XRC_INI = IB_UVERBS_QPT_XRC_INI,
+	IB_QPT_XRC_TGT = IB_UVERBS_QPT_XRC_TGT,
+	IB_QPT_MAX,
+	IB_QPT_DRIVER = IB_UVERBS_QPT_DRIVER,
+	/* Reserve a range for qp types internal to the low level driver.
+	 * These qp types will not be visible at the IB core layer, so the
+	 * IB_QPT_MAX usages should not be affected in the core layer
+	 */
+	IB_QPT_RESERVED1 = 0x1000,
+	IB_QPT_RESERVED2,
+	IB_QPT_RESERVED3,
+	IB_QPT_RESERVED4,
+	IB_QPT_RESERVED5,
+	IB_QPT_RESERVED6,
+	IB_QPT_RESERVED7,
+	IB_QPT_RESERVED8,
+	IB_QPT_RESERVED9,
+	IB_QPT_RESERVED10,
+};
+
+enum ib_qp_create_flags {
+	IB_QP_CREATE_IPOIB_UD_LSO		= 1 << 0,
+	IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK	=
+		IB_UVERBS_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+	IB_QP_CREATE_CROSS_CHANNEL              = 1 << 2,
+	IB_QP_CREATE_MANAGED_SEND               = 1 << 3,
+	IB_QP_CREATE_MANAGED_RECV               = 1 << 4,
+	IB_QP_CREATE_NETIF_QP			= 1 << 5,
+	IB_QP_CREATE_INTEGRITY_EN		= 1 << 6,
+	IB_QP_CREATE_NETDEV_USE			= 1 << 7,
+	IB_QP_CREATE_SCATTER_FCS		=
+		IB_UVERBS_QP_CREATE_SCATTER_FCS,
+	IB_QP_CREATE_CVLAN_STRIPPING		=
+		IB_UVERBS_QP_CREATE_CVLAN_STRIPPING,
+	IB_QP_CREATE_SOURCE_QPN			= 1 << 10,
+	IB_QP_CREATE_PCI_WRITE_END_PADDING	=
+		IB_UVERBS_QP_CREATE_PCI_WRITE_END_PADDING,
+	/* reserve bits 26-31 for low level drivers' internal use */
+	IB_QP_CREATE_RESERVED_START		= 1 << 26,
+	IB_QP_CREATE_RESERVED_END		= 1 << 31,
+};
+
+/*
+ * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
+ * callback to destroy the passed in QP.
+ */
+
+struct ib_qp_init_attr {
+	/* Consumer's event_handler callback must not block */
+	void                  (*event_handler)(struct ib_event *, void *);
+
+	void		       *qp_context;
+	struct ib_cq	       *send_cq;
+	struct ib_cq	       *recv_cq;
+	struct ib_srq	       *srq;
+	struct ib_xrcd	       *xrcd;     /* XRC TGT QPs only */
+	struct ib_qp_cap	cap;
+	enum ib_sig_type	sq_sig_type;
+	enum ib_qp_type		qp_type;
+	u32			create_flags;
+
+	/*
+	 * Only needed for special QP types, or when using the RW API.
+	 */
+	u8			port_num;
+	struct ib_rwq_ind_table *rwq_ind_tbl;
+	u32			source_qpn;
+};
+
+struct ib_qp_open_attr {
+	void                  (*event_handler)(struct ib_event *, void *);
+	void		       *qp_context;
+	u32			qp_num;
+	enum ib_qp_type		qp_type;
+};
+
+enum ib_rnr_timeout {
+	IB_RNR_TIMER_655_36 =  0,
+	IB_RNR_TIMER_000_01 =  1,
+	IB_RNR_TIMER_000_02 =  2,
+	IB_RNR_TIMER_000_03 =  3,
+	IB_RNR_TIMER_000_04 =  4,
+	IB_RNR_TIMER_000_06 =  5,
+	IB_RNR_TIMER_000_08 =  6,
+	IB_RNR_TIMER_000_12 =  7,
+	IB_RNR_TIMER_000_16 =  8,
+	IB_RNR_TIMER_000_24 =  9,
+	IB_RNR_TIMER_000_32 = 10,
+	IB_RNR_TIMER_000_48 = 11,
+	IB_RNR_TIMER_000_64 = 12,
+	IB_RNR_TIMER_000_96 = 13,
+	IB_RNR_TIMER_001_28 = 14,
+	IB_RNR_TIMER_001_92 = 15,
+	IB_RNR_TIMER_002_56 = 16,
+	IB_RNR_TIMER_003_84 = 17,
+	IB_RNR_TIMER_005_12 = 18,
+	IB_RNR_TIMER_007_68 = 19,
+	IB_RNR_TIMER_010_24 = 20,
+	IB_RNR_TIMER_015_36 = 21,
+	IB_RNR_TIMER_020_48 = 22,
+	IB_RNR_TIMER_030_72 = 23,
+	IB_RNR_TIMER_040_96 = 24,
+	IB_RNR_TIMER_061_44 = 25,
+	IB_RNR_TIMER_081_92 = 26,
+	IB_RNR_TIMER_122_88 = 27,
+	IB_RNR_TIMER_163_84 = 28,
+	IB_RNR_TIMER_245_76 = 29,
+	IB_RNR_TIMER_327_68 = 30,
+	IB_RNR_TIMER_491_52 = 31
+};
+
+enum ib_qp_attr_mask {
+	IB_QP_STATE			= 1,
+	IB_QP_CUR_STATE			= (1<<1),
+	IB_QP_EN_SQD_ASYNC_NOTIFY	= (1<<2),
+	IB_QP_ACCESS_FLAGS		= (1<<3),
+	IB_QP_PKEY_INDEX		= (1<<4),
+	IB_QP_PORT			= (1<<5),
+	IB_QP_QKEY			= (1<<6),
+	IB_QP_AV			= (1<<7),
+	IB_QP_PATH_MTU			= (1<<8),
+	IB_QP_TIMEOUT			= (1<<9),
+	IB_QP_RETRY_CNT			= (1<<10),
+	IB_QP_RNR_RETRY			= (1<<11),
+	IB_QP_RQ_PSN			= (1<<12),
+	IB_QP_MAX_QP_RD_ATOMIC		= (1<<13),
+	IB_QP_ALT_PATH			= (1<<14),
+	IB_QP_MIN_RNR_TIMER		= (1<<15),
+	IB_QP_SQ_PSN			= (1<<16),
+	IB_QP_MAX_DEST_RD_ATOMIC	= (1<<17),
+	IB_QP_PATH_MIG_STATE		= (1<<18),
+	IB_QP_CAP			= (1<<19),
+	IB_QP_DEST_QPN			= (1<<20),
+	IB_QP_RESERVED1			= (1<<21),
+	IB_QP_RESERVED2			= (1<<22),
+	IB_QP_RESERVED3			= (1<<23),
+	IB_QP_RESERVED4			= (1<<24),
+	IB_QP_RATE_LIMIT		= (1<<25),
+};
+
+enum ib_qp_state {
+	IB_QPS_RESET,
+	IB_QPS_INIT,
+	IB_QPS_RTR,
+	IB_QPS_RTS,
+	IB_QPS_SQD,
+	IB_QPS_SQE,
+	IB_QPS_ERR
+};
+
+enum ib_mig_state {
+	IB_MIG_MIGRATED,
+	IB_MIG_REARM,
+	IB_MIG_ARMED
+};
+
+enum ib_mw_type {
+	IB_MW_TYPE_1 = 1,
+	IB_MW_TYPE_2 = 2
+};
+
+struct ib_qp_attr {
+	enum ib_qp_state	qp_state;
+	enum ib_qp_state	cur_qp_state;
+	enum ib_mtu		path_mtu;
+	enum ib_mig_state	path_mig_state;
+	u32			qkey;
+	u32			rq_psn;
+	u32			sq_psn;
+	u32			dest_qp_num;
+	int			qp_access_flags;
+	struct ib_qp_cap	cap;
+	struct rdma_ah_attr	ah_attr;
+	struct rdma_ah_attr	alt_ah_attr;
+	u16			pkey_index;
+	u16			alt_pkey_index;
+	u8			en_sqd_async_notify;
+	u8			sq_draining;
+	u8			max_rd_atomic;
+	u8			max_dest_rd_atomic;
+	u8			min_rnr_timer;
+	u8			port_num;
+	u8			timeout;
+	u8			retry_cnt;
+	u8			rnr_retry;
+	u8			alt_port_num;
+	u8			alt_timeout;
+	u32			rate_limit;
+	struct net_device	*xmit_slave;
+};
+
+enum ib_wr_opcode {
+	/* These are shared with userspace */
+	IB_WR_RDMA_WRITE = IB_UVERBS_WR_RDMA_WRITE,
+	IB_WR_RDMA_WRITE_WITH_IMM = IB_UVERBS_WR_RDMA_WRITE_WITH_IMM,
+	IB_WR_SEND = IB_UVERBS_WR_SEND,
+	IB_WR_SEND_WITH_IMM = IB_UVERBS_WR_SEND_WITH_IMM,
+	IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ,
+	IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP,
+	IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD,
+	IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW,
+	IB_WR_LSO = IB_UVERBS_WR_TSO,
+	IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV,
+	IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV,
+	IB_WR_LOCAL_INV = IB_UVERBS_WR_LOCAL_INV,
+	IB_WR_MASKED_ATOMIC_CMP_AND_SWP =
+		IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
+	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
+		IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+
+	/* These are kernel only and can not be issued by userspace */
+	IB_WR_REG_MR = 0x20,
+	IB_WR_REG_MR_INTEGRITY,
+
+	/* reserve values for low level drivers' internal use.
+	 * These values will not be used at all in the ib core layer.
+	 */
+	IB_WR_RESERVED1 = 0xf0,
+	IB_WR_RESERVED2,
+	IB_WR_RESERVED3,
+	IB_WR_RESERVED4,
+	IB_WR_RESERVED5,
+	IB_WR_RESERVED6,
+	IB_WR_RESERVED7,
+	IB_WR_RESERVED8,
+	IB_WR_RESERVED9,
+	IB_WR_RESERVED10,
+};
+
+enum ib_send_flags {
+	IB_SEND_FENCE		= 1,
+	IB_SEND_SIGNALED	= (1<<1),
+	IB_SEND_SOLICITED	= (1<<2),
+	IB_SEND_INLINE		= (1<<3),
+	IB_SEND_IP_CSUM		= (1<<4),
+
+	/* reserve bits 26-31 for low level drivers' internal use */
+	IB_SEND_RESERVED_START	= (1 << 26),
+	IB_SEND_RESERVED_END	= (1 << 31),
+};
+
+struct ib_sge {
+	u64	addr;
+	u32	length;
+	u32	lkey;
+};
+
+struct ib_cqe {
+	void (*done)(struct ib_cq *cq, struct ib_wc *wc);
+};
+
+struct ib_send_wr {
+	struct ib_send_wr      *next;
+	union {
+		u64		wr_id;
+		struct ib_cqe	*wr_cqe;
+	};
+	struct ib_sge	       *sg_list;
+	int			num_sge;
+	enum ib_wr_opcode	opcode;
+	int			send_flags;
+	union {
+		__be32		imm_data;
+		u32		invalidate_rkey;
+	} ex;
+};
+
+struct ib_rdma_wr {
+	struct ib_send_wr	wr;
+	u64			remote_addr;
+	u32			rkey;
+};
+
+static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
+{
+	return container_of(wr, struct ib_rdma_wr, wr);
+}
+
+struct ib_atomic_wr {
+	struct ib_send_wr	wr;
+	u64			remote_addr;
+	u64			compare_add;
+	u64			swap;
+	u64			compare_add_mask;
+	u64			swap_mask;
+	u32			rkey;
+};
+
+static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
+{
+	return container_of(wr, struct ib_atomic_wr, wr);
+}
+
+struct ib_ud_wr {
+	struct ib_send_wr	wr;
+	struct ib_ah		*ah;
+	void			*header;
+	int			hlen;
+	int			mss;
+	u32			remote_qpn;
+	u32			remote_qkey;
+	u16			pkey_index; /* valid for GSI only */
+	u8			port_num;   /* valid for DR SMPs on switch only */
+};
+
+static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
+{
+	return container_of(wr, struct ib_ud_wr, wr);
+}
+
+struct ib_reg_wr {
+	struct ib_send_wr	wr;
+	struct ib_mr		*mr;
+	u32			key;
+	int			access;
+};
+
+static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
+{
+	return container_of(wr, struct ib_reg_wr, wr);
+}
+
+struct ib_recv_wr {
+	struct ib_recv_wr      *next;
+	union {
+		u64		wr_id;
+		struct ib_cqe	*wr_cqe;
+	};
+	struct ib_sge	       *sg_list;
+	int			num_sge;
+};
+
+enum ib_access_flags {
+	IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
+	IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
+	IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
+	IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
+	IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
+	IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
+	IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
+	IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+	IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
+
+	IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
+	IB_ACCESS_SUPPORTED =
+		((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
+};
+
+/*
+ * XXX: these are apparently used for ->rereg_user_mr, no idea why they
+ * are hidden here instead of a uapi header!
+ */
+enum ib_mr_rereg_flags {
+	IB_MR_REREG_TRANS	= 1,
+	IB_MR_REREG_PD		= (1<<1),
+	IB_MR_REREG_ACCESS	= (1<<2),
+	IB_MR_REREG_SUPPORTED	= ((IB_MR_REREG_ACCESS << 1) - 1)
+};
+
+struct ib_umem;
+
+enum rdma_remove_reason {
+	/*
+	 * Userspace requested uobject deletion or initial try
+	 * to remove uobject via cleanup. Call could fail
+	 */
+	RDMA_REMOVE_DESTROY,
+	/* Context deletion. This call should delete the actual object itself */
+	RDMA_REMOVE_CLOSE,
+	/* Driver is being hot-unplugged. This call should delete the actual object itself */
+	RDMA_REMOVE_DRIVER_REMOVE,
+	/* uobj is being cleaned-up before being committed */
+	RDMA_REMOVE_ABORT,
+};
+
+struct ib_rdmacg_object {
+#ifdef CONFIG_CGROUP_RDMA
+	struct rdma_cgroup	*cg;		/* owner rdma cgroup */
+#endif
+};
+
+struct ib_ucontext {
+	struct ib_device       *device;
+	struct ib_uverbs_file  *ufile;
+
+	bool cleanup_retryable;
+
+	struct ib_rdmacg_object	cg_obj;
+	/*
+	 * Implementation details of the RDMA core, don't use in drivers:
+	 */
+	struct rdma_restrack_entry res;
+	struct xarray mmap_xa;
+};
+
+struct ib_uobject {
+	u64			user_handle;	/* handle given to us by userspace */
+	/* ufile & ucontext owning this object */
+	struct ib_uverbs_file  *ufile;
+	/* FIXME, save memory: ufile->context == context */
+	struct ib_ucontext     *context;	/* associated user context */
+	void		       *object;		/* containing object */
+	struct list_head	list;		/* link to context's list */
+	struct ib_rdmacg_object	cg_obj;		/* rdmacg object */
+	int			id;		/* index into kernel idr */
+	struct kref		ref;
+	atomic_t		usecnt;		/* protects exclusive access */
+	struct rcu_head		rcu;		/* kfree_rcu() overhead */
+
+	const struct uverbs_api_object *uapi_object;
+};
+
+struct ib_udata {
+	const void __user *inbuf;
+	void __user *outbuf;
+	size_t       inlen;
+	size_t       outlen;
+};
+
+struct ib_pd {
+	u32			local_dma_lkey;
+	u32			flags;
+	struct ib_device       *device;
+	struct ib_uobject      *uobject;
+	atomic_t          	usecnt; /* count all resources */
+
+	u32			unsafe_global_rkey;
+
+	/*
+	 * Implementation details of the RDMA core, don't use in drivers:
+	 */
+	struct ib_mr	       *__internal_mr;
+	struct rdma_restrack_entry res;
+};
+
+struct ib_xrcd {
+	struct ib_device       *device;
+	atomic_t		usecnt; /* count all exposed resources */
+	struct inode	       *inode;
+	struct rw_semaphore	tgt_qps_rwsem;
+	struct xarray		tgt_qps;
+};
+
+struct ib_ah {
+	struct ib_device	*device;
+	struct ib_pd		*pd;
+	struct ib_uobject	*uobject;
+	const struct ib_gid_attr *sgid_attr;
+	enum rdma_ah_attr_type	type;
+};
+
+typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
+
+enum ib_poll_context {
+	IB_POLL_SOFTIRQ,	   /* poll from softirq context */
+	IB_POLL_WORKQUEUE,	   /* poll from workqueue */
+	IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
+	IB_POLL_LAST_POOL_TYPE = IB_POLL_UNBOUND_WORKQUEUE,
+
+	IB_POLL_DIRECT,		   /* caller context, no hw completions */
+};
+
+struct ib_cq {
+	struct ib_device       *device;
+	struct ib_ucq_object   *uobject;
+	ib_comp_handler   	comp_handler;
+	void                  (*event_handler)(struct ib_event *, void *);
+	void                   *cq_context;
+	int               	cqe;
+	unsigned int		cqe_used;
+	atomic_t          	usecnt; /* count number of work queues */
+	enum ib_poll_context	poll_ctx;
+	struct ib_wc		*wc;
+	struct list_head        pool_entry;
+	union {
+		struct irq_poll		iop;
+		struct work_struct	work;
+	};
+	struct workqueue_struct *comp_wq;
+	struct dim *dim;
+
+	/* updated only by trace points */
+	ktime_t timestamp;
+	u8 interrupt:1;
+	u8 shared:1;
+	unsigned int comp_vector;
+
+	/*
+	 * Implementation details of the RDMA core, don't use in drivers:
+	 */
+	struct rdma_restrack_entry res;
+};
+
+struct ib_srq {
+	struct ib_device       *device;
+	struct ib_pd	       *pd;
+	struct ib_usrq_object  *uobject;
+	void		      (*event_handler)(struct ib_event *, void *);
+	void		       *srq_context;
+	enum ib_srq_type	srq_type;
+	atomic_t		usecnt;
+
+	struct {
+		struct ib_cq   *cq;
+		union {
+			struct {
+				struct ib_xrcd *xrcd;
+				u32		srq_num;
+			} xrc;
+		};
+	} ext;
+};
+
+enum ib_raw_packet_caps {
+	/* Strip cvlan from incoming packet and report it in the matching work
+	 * completion is supported.
+	 */
+	IB_RAW_PACKET_CAP_CVLAN_STRIPPING	= (1 << 0),
+	/* Scatter FCS field of an incoming packet to host memory is supported.
+	 */
+	IB_RAW_PACKET_CAP_SCATTER_FCS		= (1 << 1),
+	/* Checksum offloads are supported (for both send and receive). */
+	IB_RAW_PACKET_CAP_IP_CSUM		= (1 << 2),
+	/* When a packet is received for an RQ with no receive WQEs, the
+	 * packet processing is delayed.
+	 */
+	IB_RAW_PACKET_CAP_DELAY_DROP		= (1 << 3),
+};
+
+enum ib_wq_type {
+	IB_WQT_RQ = IB_UVERBS_WQT_RQ,
+};
+
+enum ib_wq_state {
+	IB_WQS_RESET,
+	IB_WQS_RDY,
+	IB_WQS_ERR
+};
+
+struct ib_wq {
+	struct ib_device       *device;
+	struct ib_uwq_object   *uobject;
+	void		    *wq_context;
+	void		    (*event_handler)(struct ib_event *, void *);
+	struct ib_pd	       *pd;
+	struct ib_cq	       *cq;
+	u32		wq_num;
+	enum ib_wq_state       state;
+	enum ib_wq_type	wq_type;
+	atomic_t		usecnt;
+};
+
+enum ib_wq_flags {
+	IB_WQ_FLAGS_CVLAN_STRIPPING	= IB_UVERBS_WQ_FLAGS_CVLAN_STRIPPING,
+	IB_WQ_FLAGS_SCATTER_FCS		= IB_UVERBS_WQ_FLAGS_SCATTER_FCS,
+	IB_WQ_FLAGS_DELAY_DROP		= IB_UVERBS_WQ_FLAGS_DELAY_DROP,
+	IB_WQ_FLAGS_PCI_WRITE_END_PADDING =
+				IB_UVERBS_WQ_FLAGS_PCI_WRITE_END_PADDING,
+};
+
+struct ib_wq_init_attr {
+	void		       *wq_context;
+	enum ib_wq_type	wq_type;
+	u32		max_wr;
+	u32		max_sge;
+	struct	ib_cq	       *cq;
+	void		    (*event_handler)(struct ib_event *, void *);
+	u32		create_flags; /* Use enum ib_wq_flags */
+};
+
+enum ib_wq_attr_mask {
+	IB_WQ_STATE		= 1 << 0,
+	IB_WQ_CUR_STATE		= 1 << 1,
+	IB_WQ_FLAGS		= 1 << 2,
+};
+
+struct ib_wq_attr {
+	enum	ib_wq_state	wq_state;
+	enum	ib_wq_state	curr_wq_state;
+	u32			flags; /* Use enum ib_wq_flags */
+	u32			flags_mask; /* Use enum ib_wq_flags */
+};
+
+struct ib_rwq_ind_table {
+	struct ib_device	*device;
+	struct ib_uobject      *uobject;
+	atomic_t		usecnt;
+	u32		ind_tbl_num;
+	u32		log_ind_tbl_size;
+	struct ib_wq	**ind_tbl;
+};
+
+struct ib_rwq_ind_table_init_attr {
+	u32		log_ind_tbl_size;
+	/* Each entry is a pointer to Receive Work Queue */
+	struct ib_wq	**ind_tbl;
+};
+
+enum port_pkey_state {
+	IB_PORT_PKEY_NOT_VALID = 0,
+	IB_PORT_PKEY_VALID = 1,
+	IB_PORT_PKEY_LISTED = 2,
+};
+
+struct ib_qp_security;
+
+struct ib_port_pkey {
+	enum port_pkey_state	state;
+	u16			pkey_index;
+	u8			port_num;
+	struct list_head	qp_list;
+	struct list_head	to_error_list;
+	struct ib_qp_security  *sec;
+};
+
+struct ib_ports_pkeys {
+	struct ib_port_pkey	main;
+	struct ib_port_pkey	alt;
+};
+
+struct ib_qp_security {
+	struct ib_qp	       *qp;
+	struct ib_device       *dev;
+	/* Hold this mutex when changing port and pkey settings. */
+	struct mutex		mutex;
+	struct ib_ports_pkeys  *ports_pkeys;
+	/* A list of all open shared QP handles.  Required to enforce security
+	 * properly for all users of a shared QP.
+	 */
+	struct list_head        shared_qp_list;
+	void                   *security;
+	bool			destroying;
+	atomic_t		error_list_count;
+	struct completion	error_complete;
+	int			error_comps_pending;
+};
+
+/*
+ * @max_write_sge: Maximum SGE elements per RDMA WRITE request.
+ * @max_read_sge:  Maximum SGE elements per RDMA READ request.
+ */
+struct ib_qp {
+	struct ib_device       *device;
+	struct ib_pd	       *pd;
+	struct ib_cq	       *send_cq;
+	struct ib_cq	       *recv_cq;
+	spinlock_t		mr_lock;
+	int			mrs_used;
+	struct list_head	rdma_mrs;
+	struct list_head	sig_mrs;
+	struct ib_srq	       *srq;
+	struct ib_xrcd	       *xrcd; /* XRC TGT QPs only */
+	struct list_head	xrcd_list;
+
+	/* count times opened, mcast attaches, flow attaches */
+	atomic_t		usecnt;
+	struct list_head	open_list;
+	struct ib_qp           *real_qp;
+	struct ib_uqp_object   *uobject;
+	void                  (*event_handler)(struct ib_event *, void *);
+	void		       *qp_context;
+	/* sgid_attrs associated with the AV's */
+	const struct ib_gid_attr *av_sgid_attr;
+	const struct ib_gid_attr *alt_path_sgid_attr;
+	u32			qp_num;
+	u32			max_write_sge;
+	u32			max_read_sge;
+	enum ib_qp_type		qp_type;
+	struct ib_rwq_ind_table *rwq_ind_tbl;
+	struct ib_qp_security  *qp_sec;
+	u8			port;
+
+	bool			integrity_en;
+	/*
+	 * Implementation details of the RDMA core, don't use in drivers:
+	 */
+	struct rdma_restrack_entry     res;
+
+	/* The counter the qp is bind to */
+	struct rdma_counter    *counter;
+};
+
+struct ib_dm {
+	struct ib_device  *device;
+	u32		   length;
+	u32		   flags;
+	struct ib_uobject *uobject;
+	atomic_t	   usecnt;
+};
+
+struct ib_mr {
+	struct ib_device  *device;
+	struct ib_pd	  *pd;
+	u32		   lkey;
+	u32		   rkey;
+	u64		   iova;
+	u64		   length;
+	unsigned int	   page_size;
+	enum ib_mr_type	   type;
+	bool		   need_inval;
+	union {
+		struct ib_uobject	*uobject;	/* user */
+		struct list_head	qp_entry;	/* FR */
+	};
+
+	struct ib_dm      *dm;
+	struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */
+	/*
+	 * Implementation details of the RDMA core, don't use in drivers:
+	 */
+	struct rdma_restrack_entry res;
+};
+
+struct ib_mw {
+	struct ib_device	*device;
+	struct ib_pd		*pd;
+	struct ib_uobject	*uobject;
+	u32			rkey;
+	enum ib_mw_type         type;
+};
+
+/* Supported steering options */
+enum ib_flow_attr_type {
+	/* steering according to rule specifications */
+	IB_FLOW_ATTR_NORMAL		= 0x0,
+	/* default unicast and multicast rule -
+	 * receive all Eth traffic which isn't steered to any QP
+	 */
+	IB_FLOW_ATTR_ALL_DEFAULT	= 0x1,
+	/* default multicast rule -
+	 * receive all Eth multicast traffic which isn't steered to any QP
+	 */
+	IB_FLOW_ATTR_MC_DEFAULT		= 0x2,
+	/* sniffer rule - receive all port traffic */
+	IB_FLOW_ATTR_SNIFFER		= 0x3
+};
+
+/* Supported steering header types */
+enum ib_flow_spec_type {
+	/* L2 headers*/
+	IB_FLOW_SPEC_ETH		= 0x20,
+	IB_FLOW_SPEC_IB			= 0x22,
+	/* L3 header*/
+	IB_FLOW_SPEC_IPV4		= 0x30,
+	IB_FLOW_SPEC_IPV6		= 0x31,
+	IB_FLOW_SPEC_ESP                = 0x34,
+	/* L4 headers*/
+	IB_FLOW_SPEC_TCP		= 0x40,
+	IB_FLOW_SPEC_UDP		= 0x41,
+	IB_FLOW_SPEC_VXLAN_TUNNEL	= 0x50,
+	IB_FLOW_SPEC_GRE		= 0x51,
+	IB_FLOW_SPEC_MPLS		= 0x60,
+	IB_FLOW_SPEC_INNER		= 0x100,
+	/* Actions */
+	IB_FLOW_SPEC_ACTION_TAG         = 0x1000,
+	IB_FLOW_SPEC_ACTION_DROP        = 0x1001,
+	IB_FLOW_SPEC_ACTION_HANDLE	= 0x1002,
+	IB_FLOW_SPEC_ACTION_COUNT       = 0x1003,
+};
+#define IB_FLOW_SPEC_LAYER_MASK	0xF0
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 10
+
+enum ib_flow_flags {
+	IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */
+	IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */
+	IB_FLOW_ATTR_FLAGS_RESERVED  = 1UL << 3  /* Must be last */
+};
+
+struct ib_flow_eth_filter {
+	u8	dst_mac[6];
+	u8	src_mac[6];
+	__be16	ether_type;
+	__be16	vlan_tag;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_eth {
+	u32			  type;
+	u16			  size;
+	struct ib_flow_eth_filter val;
+	struct ib_flow_eth_filter mask;
+};
+
+struct ib_flow_ib_filter {
+	__be16 dlid;
+	__u8   sl;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_ib {
+	u32			 type;
+	u16			 size;
+	struct ib_flow_ib_filter val;
+	struct ib_flow_ib_filter mask;
+};
+
+/* IPv4 header flags */
+enum ib_ipv4_flags {
+	IB_IPV4_DONT_FRAG = 0x2, /* Don't enable packet fragmentation */
+	IB_IPV4_MORE_FRAG = 0X4  /* For All fragmented packets except the
+				    last have this flag set */
+};
+
+struct ib_flow_ipv4_filter {
+	__be32	src_ip;
+	__be32	dst_ip;
+	u8	proto;
+	u8	tos;
+	u8	ttl;
+	u8	flags;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_ipv4 {
+	u32			   type;
+	u16			   size;
+	struct ib_flow_ipv4_filter val;
+	struct ib_flow_ipv4_filter mask;
+};
+
+struct ib_flow_ipv6_filter {
+	u8	src_ip[16];
+	u8	dst_ip[16];
+	__be32	flow_label;
+	u8	next_hdr;
+	u8	traffic_class;
+	u8	hop_limit;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_ipv6 {
+	u32			   type;
+	u16			   size;
+	struct ib_flow_ipv6_filter val;
+	struct ib_flow_ipv6_filter mask;
+};
+
+struct ib_flow_tcp_udp_filter {
+	__be16	dst_port;
+	__be16	src_port;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_tcp_udp {
+	u32			      type;
+	u16			      size;
+	struct ib_flow_tcp_udp_filter val;
+	struct ib_flow_tcp_udp_filter mask;
+};
+
+struct ib_flow_tunnel_filter {
+	__be32	tunnel_id;
+	u8	real_sz[];
+};
+
+/* ib_flow_spec_tunnel describes the Vxlan tunnel
+ * the tunnel_id from val has the vni value
+ */
+struct ib_flow_spec_tunnel {
+	u32			      type;
+	u16			      size;
+	struct ib_flow_tunnel_filter  val;
+	struct ib_flow_tunnel_filter  mask;
+};
+
+struct ib_flow_esp_filter {
+	__be32	spi;
+	__be32  seq;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_esp {
+	u32                           type;
+	u16			      size;
+	struct ib_flow_esp_filter     val;
+	struct ib_flow_esp_filter     mask;
+};
+
+struct ib_flow_gre_filter {
+	__be16 c_ks_res0_ver;
+	__be16 protocol;
+	__be32 key;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_gre {
+	u32                           type;
+	u16			      size;
+	struct ib_flow_gre_filter     val;
+	struct ib_flow_gre_filter     mask;
+};
+
+struct ib_flow_mpls_filter {
+	__be32 tag;
+	/* Must be last */
+	u8	real_sz[];
+};
+
+struct ib_flow_spec_mpls {
+	u32                           type;
+	u16			      size;
+	struct ib_flow_mpls_filter     val;
+	struct ib_flow_mpls_filter     mask;
+};
+
+struct ib_flow_spec_action_tag {
+	enum ib_flow_spec_type	      type;
+	u16			      size;
+	u32                           tag_id;
+};
+
+struct ib_flow_spec_action_drop {
+	enum ib_flow_spec_type	      type;
+	u16			      size;
+};
+
+struct ib_flow_spec_action_handle {
+	enum ib_flow_spec_type	      type;
+	u16			      size;
+	struct ib_flow_action	     *act;
+};
+
+enum ib_counters_description {
+	IB_COUNTER_PACKETS,
+	IB_COUNTER_BYTES,
+};
+
+struct ib_flow_spec_action_count {
+	enum ib_flow_spec_type type;
+	u16 size;
+	struct ib_counters *counters;
+};
+
+union ib_flow_spec {
+	struct {
+		u32			type;
+		u16			size;
+	};
+	struct ib_flow_spec_eth		eth;
+	struct ib_flow_spec_ib		ib;
+	struct ib_flow_spec_ipv4        ipv4;
+	struct ib_flow_spec_tcp_udp	tcp_udp;
+	struct ib_flow_spec_ipv6        ipv6;
+	struct ib_flow_spec_tunnel      tunnel;
+	struct ib_flow_spec_esp		esp;
+	struct ib_flow_spec_gre		gre;
+	struct ib_flow_spec_mpls	mpls;
+	struct ib_flow_spec_action_tag  flow_tag;
+	struct ib_flow_spec_action_drop drop;
+	struct ib_flow_spec_action_handle action;
+	struct ib_flow_spec_action_count flow_count;
+};
+
+struct ib_flow_attr {
+	enum ib_flow_attr_type type;
+	u16	     size;
+	u16	     priority;
+	u32	     flags;
+	u8	     num_of_specs;
+	u8	     port;
+	union ib_flow_spec flows[];
+};
+
+struct ib_flow {
+	struct ib_qp		*qp;
+	struct ib_device	*device;
+	struct ib_uobject	*uobject;
+};
+
+enum ib_flow_action_type {
+	IB_FLOW_ACTION_UNSPECIFIED,
+	IB_FLOW_ACTION_ESP = 1,
+};
+
+struct ib_flow_action_attrs_esp_keymats {
+	enum ib_uverbs_flow_action_esp_keymat			protocol;
+	union {
+		struct ib_uverbs_flow_action_esp_keymat_aes_gcm aes_gcm;
+	} keymat;
+};
+
+struct ib_flow_action_attrs_esp_replays {
+	enum ib_uverbs_flow_action_esp_replay			protocol;
+	union {
+		struct ib_uverbs_flow_action_esp_replay_bmp	bmp;
+	} replay;
+};
+
+enum ib_flow_action_attrs_esp_flags {
+	/* All user-space flags at the top: Use enum ib_uverbs_flow_action_esp_flags
+	 * This is done in order to share the same flags between user-space and
+	 * kernel and spare an unnecessary translation.
+	 */
+
+	/* Kernel flags */
+	IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED	= 1ULL << 32,
+	IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS	= 1ULL << 33,
+};
+
+struct ib_flow_spec_list {
+	struct ib_flow_spec_list	*next;
+	union ib_flow_spec		spec;
+};
+
+struct ib_flow_action_attrs_esp {
+	struct ib_flow_action_attrs_esp_keymats		*keymat;
+	struct ib_flow_action_attrs_esp_replays		*replay;
+	struct ib_flow_spec_list			*encap;
+	/* Used only if IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED is enabled.
+	 * Value of 0 is a valid value.
+	 */
+	u32						esn;
+	u32						spi;
+	u32						seq;
+	u32						tfc_pad;
+	/* Use enum ib_flow_action_attrs_esp_flags */
+	u64						flags;
+	u64						hard_limit_pkts;
+};
+
+struct ib_flow_action {
+	struct ib_device		*device;
+	struct ib_uobject		*uobject;
+	enum ib_flow_action_type	type;
+	atomic_t			usecnt;
+};
+
+struct ib_mad;
+struct ib_grh;
+
+enum ib_process_mad_flags {
+	IB_MAD_IGNORE_MKEY	= 1,
+	IB_MAD_IGNORE_BKEY	= 2,
+	IB_MAD_IGNORE_ALL	= IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY
+};
+
+enum ib_mad_result {
+	IB_MAD_RESULT_FAILURE  = 0,      /* (!SUCCESS is the important flag) */
+	IB_MAD_RESULT_SUCCESS  = 1 << 0, /* MAD was successfully processed   */
+	IB_MAD_RESULT_REPLY    = 1 << 1, /* Reply packet needs to be sent    */
+	IB_MAD_RESULT_CONSUMED = 1 << 2  /* Packet consumed: stop processing */
+};
+
+struct ib_port_cache {
+	u64		      subnet_prefix;
+	struct ib_pkey_cache  *pkey;
+	struct ib_gid_table   *gid;
+	u8                     lmc;
+	enum ib_port_state     port_state;
+};
+
+struct ib_port_immutable {
+	int                           pkey_tbl_len;
+	int                           gid_tbl_len;
+	u32                           core_cap_flags;
+	u32                           max_mad_size;
+};
+
+struct ib_port_data {
+	struct ib_device *ib_dev;
+
+	struct ib_port_immutable immutable;
+
+	spinlock_t pkey_list_lock;
+	struct list_head pkey_list;
+
+	struct ib_port_cache cache;
+
+	spinlock_t netdev_lock;
+	struct net_device __rcu *netdev;
+	struct hlist_node ndev_hash_link;
+	struct rdma_port_counter port_counter;
+	struct rdma_hw_stats *hw_stats;
+};
+
+/* rdma netdev type - specifies protocol type */
+enum rdma_netdev_t {
+	RDMA_NETDEV_OPA_VNIC,
+	RDMA_NETDEV_IPOIB,
+};
+
+/**
+ * struct rdma_netdev - rdma netdev
+ * For cases where netstack interfacing is required.
+ */
+struct rdma_netdev {
+	void              *clnt_priv;
+	struct ib_device  *hca;
+	u8                 port_num;
+	int                mtu;
+
+	/*
+	 * cleanup function must be specified.
+	 * FIXME: This is only used for OPA_VNIC and that usage should be
+	 * removed too.
+	 */
+	void (*free_rdma_netdev)(struct net_device *netdev);
+
+	/* control functions */
+	void (*set_id)(struct net_device *netdev, int id);
+	/* send packet */
+	int (*send)(struct net_device *dev, struct sk_buff *skb,
+		    struct ib_ah *address, u32 dqpn);
+	/* multicast */
+	int (*attach_mcast)(struct net_device *dev, struct ib_device *hca,
+			    union ib_gid *gid, u16 mlid,
+			    int set_qkey, u32 qkey);
+	int (*detach_mcast)(struct net_device *dev, struct ib_device *hca,
+			    union ib_gid *gid, u16 mlid);
+};
+
+struct rdma_netdev_alloc_params {
+	size_t sizeof_priv;
+	unsigned int txqs;
+	unsigned int rxqs;
+	void *param;
+
+	int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
+				      struct net_device *netdev, void *param);
+};
+
+struct ib_odp_counters {
+	atomic64_t faults;
+	atomic64_t invalidations;
+	atomic64_t prefetch;
+};
+
+struct ib_counters {
+	struct ib_device	*device;
+	struct ib_uobject	*uobject;
+	/* num of objects attached */
+	atomic_t	usecnt;
+};
+
+struct ib_counters_read_attr {
+	u64	*counters_buff;
+	u32	ncounters;
+	u32	flags; /* use enum ib_read_counters_flags */
+};
+
+struct uverbs_attr_bundle;
+struct iw_cm_id;
+struct iw_cm_conn_param;
+
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member)                      \
+	.size_##ib_struct =                                                    \
+		(sizeof(struct drv_struct) +                                   \
+		 BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) +      \
+		 BUILD_BUG_ON_ZERO(                                            \
+			 !__same_type(((struct drv_struct *)NULL)->member,     \
+				      struct ib_struct)))
+
+#define rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, gfp)                         \
+	((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, gfp))
+
+#define rdma_zalloc_drv_obj(ib_dev, ib_type)                                   \
+	rdma_zalloc_drv_obj_gfp(ib_dev, ib_type, GFP_KERNEL)
+
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
+
+struct rdma_user_mmap_entry {
+	struct kref ref;
+	struct ib_ucontext *ucontext;
+	unsigned long start_pgoff;
+	size_t npages;
+	bool driver_removed;
+};
+
+/* Return the offset (in bytes) the user should pass to libc's mmap() */
+static inline u64
+rdma_user_mmap_get_offset(const struct rdma_user_mmap_entry *entry)
+{
+	return (u64)entry->start_pgoff << PAGE_SHIFT;
+}
+
+/**
+ * struct ib_device_ops - InfiniBand device operations
+ * This structure defines all the InfiniBand device operations, providers will
+ * need to define the supported operations, otherwise they will be set to null.
+ */
+struct ib_device_ops {
+	struct module *owner;
+	enum rdma_driver_id driver_id;
+	u32 uverbs_abi_ver;
+	unsigned int uverbs_no_driver_id_binding:1;
+
+	int (*post_send)(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+			 const struct ib_send_wr **bad_send_wr);
+	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+			 const struct ib_recv_wr **bad_recv_wr);
+	void (*drain_rq)(struct ib_qp *qp);
+	void (*drain_sq)(struct ib_qp *qp);
+	int (*poll_cq)(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+	int (*peek_cq)(struct ib_cq *cq, int wc_cnt);
+	int (*req_notify_cq)(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+	int (*req_ncomp_notif)(struct ib_cq *cq, int wc_cnt);
+	int (*post_srq_recv)(struct ib_srq *srq,
+			     const struct ib_recv_wr *recv_wr,
+			     const struct ib_recv_wr **bad_recv_wr);
+	int (*process_mad)(struct ib_device *device, int process_mad_flags,
+			   u8 port_num, const struct ib_wc *in_wc,
+			   const struct ib_grh *in_grh,
+			   const struct ib_mad *in_mad, struct ib_mad *out_mad,
+			   size_t *out_mad_size, u16 *out_mad_pkey_index);
+	int (*query_device)(struct ib_device *device,
+			    struct ib_device_attr *device_attr,
+			    struct ib_udata *udata);
+	int (*modify_device)(struct ib_device *device, int device_modify_mask,
+			     struct ib_device_modify *device_modify);
+	void (*get_dev_fw_str)(struct ib_device *device, char *str);
+	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
+						     int comp_vector);
+	int (*query_port)(struct ib_device *device, u8 port_num,
+			  struct ib_port_attr *port_attr);
+	int (*modify_port)(struct ib_device *device, u8 port_num,
+			   int port_modify_mask,
+			   struct ib_port_modify *port_modify);
+	/**
+	 * The following mandatory functions are used only at device
+	 * registration.  Keep functions such as these at the end of this
+	 * structure to avoid cache line misses when accessing struct ib_device
+	 * in fast paths.
+	 */
+	int (*get_port_immutable)(struct ib_device *device, u8 port_num,
+				  struct ib_port_immutable *immutable);
+	enum rdma_link_layer (*get_link_layer)(struct ib_device *device,
+					       u8 port_num);
+	/**
+	 * When calling get_netdev, the HW vendor's driver should return the
+	 * net device of device @device at port @port_num or NULL if such
+	 * a net device doesn't exist. The vendor driver should call dev_hold
+	 * on this net device. The HW vendor's device driver must guarantee
+	 * that this function returns NULL before the net device has finished
+	 * NETDEV_UNREGISTER state.
+	 */
+	struct net_device *(*get_netdev)(struct ib_device *device, u8 port_num);
+	/**
+	 * rdma netdev operation
+	 *
+	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
+	 * must return -EOPNOTSUPP if it doesn't support the specified type.
+	 */
+	struct net_device *(*alloc_rdma_netdev)(
+		struct ib_device *device, u8 port_num, enum rdma_netdev_t type,
+		const char *name, unsigned char name_assign_type,
+		void (*setup)(struct net_device *));
+
+	int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+				      enum rdma_netdev_t type,
+				      struct rdma_netdev_alloc_params *params);
+	/**
+	 * query_gid should be return GID value for @device, when @port_num
+	 * link layer is either IB or iWarp. It is no-op if @port_num port
+	 * is RoCE link layer.
+	 */
+	int (*query_gid)(struct ib_device *device, u8 port_num, int index,
+			 union ib_gid *gid);
+	/**
+	 * When calling add_gid, the HW vendor's driver should add the gid
+	 * of device of port at gid index available at @attr. Meta-info of
+	 * that gid (for example, the network device related to this gid) is
+	 * available at @attr. @context allows the HW vendor driver to store
+	 * extra information together with a GID entry. The HW vendor driver may
+	 * allocate memory to contain this information and store it in @context
+	 * when a new GID entry is written to. Params are consistent until the
+	 * next call of add_gid or delete_gid. The function should return 0 on
+	 * success or error otherwise. The function could be called
+	 * concurrently for different ports. This function is only called when
+	 * roce_gid_table is used.
+	 */
+	int (*add_gid)(const struct ib_gid_attr *attr, void **context);
+	/**
+	 * When calling del_gid, the HW vendor's driver should delete the
+	 * gid of device @device at gid index gid_index of port port_num
+	 * available in @attr.
+	 * Upon the deletion of a GID entry, the HW vendor must free any
+	 * allocated memory. The caller will clear @context afterwards.
+	 * This function is only called when roce_gid_table is used.
+	 */
+	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
+	int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
+			  u16 *pkey);
+	int (*alloc_ucontext)(struct ib_ucontext *context,
+			      struct ib_udata *udata);
+	void (*dealloc_ucontext)(struct ib_ucontext *context);
+	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
+	/**
+	 * This will be called once refcount of an entry in mmap_xa reaches
+	 * zero. The type of the memory that was mapped may differ between
+	 * entries and is opaque to the rdma_user_mmap interface.
+	 * Therefore needs to be implemented by the driver in mmap_free.
+	 */
+	void (*mmap_free)(struct rdma_user_mmap_entry *entry);
+	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
+	int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+	int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata);
+	int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr,
+			 struct ib_udata *udata);
+	int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+	int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+	int (*destroy_ah)(struct ib_ah *ah, u32 flags);
+	int (*create_srq)(struct ib_srq *srq,
+			  struct ib_srq_init_attr *srq_init_attr,
+			  struct ib_udata *udata);
+	int (*modify_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
+			  enum ib_srq_attr_mask srq_attr_mask,
+			  struct ib_udata *udata);
+	int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+	int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata);
+	struct ib_qp *(*create_qp)(struct ib_pd *pd,
+				   struct ib_qp_init_attr *qp_init_attr,
+				   struct ib_udata *udata);
+	int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask, struct ib_udata *udata);
+	int (*query_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+	int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata);
+	int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr,
+			 struct ib_udata *udata);
+	int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+	int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata);
+	int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+	struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags);
+	struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length,
+				     u64 virt_addr, int mr_access_flags,
+				     struct ib_udata *udata);
+	int (*rereg_user_mr)(struct ib_mr *mr, int flags, u64 start, u64 length,
+			     u64 virt_addr, int mr_access_flags,
+			     struct ib_pd *pd, struct ib_udata *udata);
+	int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata);
+	struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type,
+				  u32 max_num_sg);
+	struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd,
+					    u32 max_num_data_sg,
+					    u32 max_num_meta_sg);
+	int (*advise_mr)(struct ib_pd *pd,
+			 enum ib_uverbs_advise_mr_advice advice, u32 flags,
+			 struct ib_sge *sg_list, u32 num_sge,
+			 struct uverbs_attr_bundle *attrs);
+	int (*map_mr_sg)(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+			 unsigned int *sg_offset);
+	int (*check_mr_status)(struct ib_mr *mr, u32 check_mask,
+			       struct ib_mr_status *mr_status);
+	int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata);
+	int (*dealloc_mw)(struct ib_mw *mw);
+	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+	int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+	int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata);
+	struct ib_flow *(*create_flow)(struct ib_qp *qp,
+				       struct ib_flow_attr *flow_attr,
+				       struct ib_udata *udata);
+	int (*destroy_flow)(struct ib_flow *flow_id);
+	struct ib_flow_action *(*create_flow_action_esp)(
+		struct ib_device *device,
+		const struct ib_flow_action_attrs_esp *attr,
+		struct uverbs_attr_bundle *attrs);
+	int (*destroy_flow_action)(struct ib_flow_action *action);
+	int (*modify_flow_action_esp)(
+		struct ib_flow_action *action,
+		const struct ib_flow_action_attrs_esp *attr,
+		struct uverbs_attr_bundle *attrs);
+	int (*set_vf_link_state)(struct ib_device *device, int vf, u8 port,
+				 int state);
+	int (*get_vf_config)(struct ib_device *device, int vf, u8 port,
+			     struct ifla_vf_info *ivf);
+	int (*get_vf_stats)(struct ib_device *device, int vf, u8 port,
+			    struct ifla_vf_stats *stats);
+	int (*get_vf_guid)(struct ib_device *device, int vf, u8 port,
+			    struct ifla_vf_guid *node_guid,
+			    struct ifla_vf_guid *port_guid);
+	int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid,
+			   int type);
+	struct ib_wq *(*create_wq)(struct ib_pd *pd,
+				   struct ib_wq_init_attr *init_attr,
+				   struct ib_udata *udata);
+	int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata);
+	int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr,
+			 u32 wq_attr_mask, struct ib_udata *udata);
+	int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table,
+				    struct ib_rwq_ind_table_init_attr *init_attr,
+				    struct ib_udata *udata);
+	int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table);
+	struct ib_dm *(*alloc_dm)(struct ib_device *device,
+				  struct ib_ucontext *context,
+				  struct ib_dm_alloc_attr *attr,
+				  struct uverbs_attr_bundle *attrs);
+	int (*dealloc_dm)(struct ib_dm *dm, struct uverbs_attr_bundle *attrs);
+	struct ib_mr *(*reg_dm_mr)(struct ib_pd *pd, struct ib_dm *dm,
+				   struct ib_dm_mr_attr *attr,
+				   struct uverbs_attr_bundle *attrs);
+	int (*create_counters)(struct ib_counters *counters,
+			       struct uverbs_attr_bundle *attrs);
+	int (*destroy_counters)(struct ib_counters *counters);
+	int (*read_counters)(struct ib_counters *counters,
+			     struct ib_counters_read_attr *counters_read_attr,
+			     struct uverbs_attr_bundle *attrs);
+	int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg,
+			    int data_sg_nents, unsigned int *data_sg_offset,
+			    struct scatterlist *meta_sg, int meta_sg_nents,
+			    unsigned int *meta_sg_offset);
+
+	/**
+	 * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the
+	 *   driver initialized data.  The struct is kfree()'ed by the sysfs
+	 *   core when the device is removed.  A lifespan of -1 in the return
+	 *   struct tells the core to set a default lifespan.
+	 */
+	struct rdma_hw_stats *(*alloc_hw_stats)(struct ib_device *device,
+						u8 port_num);
+	/**
+	 * get_hw_stats - Fill in the counter value(s) in the stats struct.
+	 * @index - The index in the value array we wish to have updated, or
+	 *   num_counters if we want all stats updated
+	 * Return codes -
+	 *   < 0 - Error, no counters updated
+	 *   index - Updated the single counter pointed to by index
+	 *   num_counters - Updated all counters (will reset the timestamp
+	 *     and prevent further calls for lifespan milliseconds)
+	 * Drivers are allowed to update all counters in leiu of just the
+	 *   one given in index at their option
+	 */
+	int (*get_hw_stats)(struct ib_device *device,
+			    struct rdma_hw_stats *stats, u8 port, int index);
+	/*
+	 * This function is called once for each port when a ib device is
+	 * registered.
+	 */
+	int (*init_port)(struct ib_device *device, u8 port_num,
+			 struct kobject *port_sysfs);
+	/**
+	 * Allows rdma drivers to add their own restrack attributes.
+	 */
+	int (*fill_res_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
+	int (*fill_res_mr_entry_raw)(struct sk_buff *msg, struct ib_mr *ibmr);
+	int (*fill_res_cq_entry)(struct sk_buff *msg, struct ib_cq *ibcq);
+	int (*fill_res_cq_entry_raw)(struct sk_buff *msg, struct ib_cq *ibcq);
+	int (*fill_res_qp_entry)(struct sk_buff *msg, struct ib_qp *ibqp);
+	int (*fill_res_qp_entry_raw)(struct sk_buff *msg, struct ib_qp *ibqp);
+	int (*fill_res_cm_id_entry)(struct sk_buff *msg, struct rdma_cm_id *id);
+
+	/* Device lifecycle callbacks */
+	/*
+	 * Called after the device becomes registered, before clients are
+	 * attached
+	 */
+	int (*enable_driver)(struct ib_device *dev);
+	/*
+	 * This is called as part of ib_dealloc_device().
+	 */
+	void (*dealloc_driver)(struct ib_device *dev);
+
+	/* iWarp CM callbacks */
+	void (*iw_add_ref)(struct ib_qp *qp);
+	void (*iw_rem_ref)(struct ib_qp *qp);
+	struct ib_qp *(*iw_get_qp)(struct ib_device *device, int qpn);
+	int (*iw_connect)(struct iw_cm_id *cm_id,
+			  struct iw_cm_conn_param *conn_param);
+	int (*iw_accept)(struct iw_cm_id *cm_id,
+			 struct iw_cm_conn_param *conn_param);
+	int (*iw_reject)(struct iw_cm_id *cm_id, const void *pdata,
+			 u8 pdata_len);
+	int (*iw_create_listen)(struct iw_cm_id *cm_id, int backlog);
+	int (*iw_destroy_listen)(struct iw_cm_id *cm_id);
+	/**
+	 * counter_bind_qp - Bind a QP to a counter.
+	 * @counter - The counter to be bound. If counter->id is zero then
+	 *   the driver needs to allocate a new counter and set counter->id
+	 */
+	int (*counter_bind_qp)(struct rdma_counter *counter, struct ib_qp *qp);
+	/**
+	 * counter_unbind_qp - Unbind the qp from the dynamically-allocated
+	 *   counter and bind it onto the default one
+	 */
+	int (*counter_unbind_qp)(struct ib_qp *qp);
+	/**
+	 * counter_dealloc -De-allocate the hw counter
+	 */
+	int (*counter_dealloc)(struct rdma_counter *counter);
+	/**
+	 * counter_alloc_stats - Allocate a struct rdma_hw_stats and fill in
+	 * the driver initialized data.
+	 */
+	struct rdma_hw_stats *(*counter_alloc_stats)(
+		struct rdma_counter *counter);
+	/**
+	 * counter_update_stats - Query the stats value of this counter
+	 */
+	int (*counter_update_stats)(struct rdma_counter *counter);
+
+	/**
+	 * Allows rdma drivers to add their own restrack attributes
+	 * dumped via 'rdma stat' iproute2 command.
+	 */
+	int (*fill_stat_mr_entry)(struct sk_buff *msg, struct ib_mr *ibmr);
+
+	/* query driver for its ucontext properties */
+	int (*query_ucontext)(struct ib_ucontext *context,
+			      struct uverbs_attr_bundle *attrs);
+
+	DECLARE_RDMA_OBJ_SIZE(ib_ah);
+	DECLARE_RDMA_OBJ_SIZE(ib_counters);
+	DECLARE_RDMA_OBJ_SIZE(ib_cq);
+	DECLARE_RDMA_OBJ_SIZE(ib_mw);
+	DECLARE_RDMA_OBJ_SIZE(ib_pd);
+	DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table);
+	DECLARE_RDMA_OBJ_SIZE(ib_srq);
+	DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
+	DECLARE_RDMA_OBJ_SIZE(ib_xrcd);
+};
+
+struct ib_core_device {
+	/* device must be the first element in structure until,
+	 * union of ib_core_device and device exists in ib_device.
+	 */
+	struct device dev;
+	possible_net_t rdma_net;
+	struct kobject *ports_kobj;
+	struct list_head port_list;
+	struct ib_device *owner; /* reach back to owner ib_device */
+};
+
+struct rdma_restrack_root;
+struct ib_device {
+	/* Do not access @dma_device directly from ULP nor from HW drivers. */
+	struct device                *dma_device;
+	struct ib_device_ops	     ops;
+	char                          name[IB_DEVICE_NAME_MAX];
+	struct rcu_head rcu_head;
+
+	struct list_head              event_handler_list;
+	/* Protects event_handler_list */
+	struct rw_semaphore event_handler_rwsem;
+
+	/* Protects QP's event_handler calls and open_qp list */
+	spinlock_t qp_open_list_lock;
+
+	struct rw_semaphore	      client_data_rwsem;
+	struct xarray                 client_data;
+	struct mutex                  unregistration_lock;
+
+	/* Synchronize GID, Pkey cache entries, subnet prefix, LMC */
+	rwlock_t cache_lock;
+	/**
+	 * port_data is indexed by port number
+	 */
+	struct ib_port_data *port_data;
+
+	int			      num_comp_vectors;
+
+	union {
+		struct device		dev;
+		struct ib_core_device	coredev;
+	};
+
+	/* First group for device attributes,
+	 * Second group for driver provided attributes (optional).
+	 * It is NULL terminated array.
+	 */
+	const struct attribute_group	*groups[3];
+
+	u64			     uverbs_cmd_mask;
+	u64			     uverbs_ex_cmd_mask;
+
+	char			     node_desc[IB_DEVICE_NODE_DESC_MAX];
+	__be64			     node_guid;
+	u32			     local_dma_lkey;
+	u16                          is_switch:1;
+	/* Indicates kernel verbs support, should not be used in drivers */
+	u16                          kverbs_provider:1;
+	/* CQ adaptive moderation (RDMA DIM) */
+	u16                          use_cq_dim:1;
+	u8                           node_type;
+	u8                           phys_port_cnt;
+	struct ib_device_attr        attrs;
+	struct attribute_group	     *hw_stats_ag;
+	struct rdma_hw_stats         *hw_stats;
+
+#ifdef CONFIG_CGROUP_RDMA
+	struct rdmacg_device         cg_device;
+#endif
+
+	u32                          index;
+
+	spinlock_t                   cq_pools_lock;
+	struct list_head             cq_pools[IB_POLL_LAST_POOL_TYPE + 1];
+
+	struct rdma_restrack_root *res;
+
+	const struct uapi_definition   *driver_def;
+
+	/*
+	 * Positive refcount indicates that the device is currently
+	 * registered and cannot be unregistered.
+	 */
+	refcount_t refcount;
+	struct completion unreg_completion;
+	struct work_struct unregistration_work;
+
+	const struct rdma_link_ops *link_ops;
+
+	/* Protects compat_devs xarray modifications */
+	struct mutex compat_devs_mutex;
+	/* Maintains compat devices for each net namespace */
+	struct xarray compat_devs;
+
+	/* Used by iWarp CM */
+	char iw_ifname[IFNAMSIZ];
+	u32 iw_driver_flags;
+	u32 lag_flags;
+};
+
+struct ib_client_nl_info;
+struct ib_client {
+	const char *name;
+	int (*add)(struct ib_device *ibdev);
+	void (*remove)(struct ib_device *, void *client_data);
+	void (*rename)(struct ib_device *dev, void *client_data);
+	int (*get_nl_info)(struct ib_device *ibdev, void *client_data,
+			   struct ib_client_nl_info *res);
+	int (*get_global_nl_info)(struct ib_client_nl_info *res);
+
+	/* Returns the net_dev belonging to this ib_client and matching the
+	 * given parameters.
+	 * @dev:	 An RDMA device that the net_dev use for communication.
+	 * @port:	 A physical port number on the RDMA device.
+	 * @pkey:	 P_Key that the net_dev uses if applicable.
+	 * @gid:	 A GID that the net_dev uses to communicate.
+	 * @addr:	 An IP address the net_dev is configured with.
+	 * @client_data: The device's client data set by ib_set_client_data().
+	 *
+	 * An ib_client that implements a net_dev on top of RDMA devices
+	 * (such as IP over IB) should implement this callback, allowing the
+	 * rdma_cm module to find the right net_dev for a given request.
+	 *
+	 * The caller is responsible for calling dev_put on the returned
+	 * netdev. */
+	struct net_device *(*get_net_dev_by_params)(
+			struct ib_device *dev,
+			u8 port,
+			u16 pkey,
+			const union ib_gid *gid,
+			const struct sockaddr *addr,
+			void *client_data);
+
+	refcount_t uses;
+	struct completion uses_zero;
+	u32 client_id;
+
+	/* kverbs are not required by the client */
+	u8 no_kverbs_req:1;
+};
+
+/*
+ * IB block DMA iterator
+ *
+ * Iterates the DMA-mapped SGL in contiguous memory blocks aligned
+ * to a HW supported page size.
+ */
+struct ib_block_iter {
+	/* internal states */
+	struct scatterlist *__sg;	/* sg holding the current aligned block */
+	dma_addr_t __dma_addr;		/* unaligned DMA address of this block */
+	unsigned int __sg_nents;	/* number of SG entries */
+	unsigned int __sg_advance;	/* number of bytes to advance in sg in next step */
+	unsigned int __pg_bit;		/* alignment of current block */
+};
+
+struct ib_device *_ib_alloc_device(size_t size);
+#define ib_alloc_device(drv_struct, member)                                    \
+	container_of(_ib_alloc_device(sizeof(struct drv_struct) +              \
+				      BUILD_BUG_ON_ZERO(offsetof(              \
+					      struct drv_struct, member))),    \
+		     struct drv_struct, member)
+
+void ib_dealloc_device(struct ib_device *device);
+
+void ib_get_device_fw_str(struct ib_device *device, char *str);
+
+int ib_register_device(struct ib_device *device, const char *name,
+		       struct device *dma_device);
+void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
+void ib_unregister_device_queued(struct ib_device *ib_dev);
+
+int ib_register_client   (struct ib_client *client);
+void ib_unregister_client(struct ib_client *client);
+
+void __rdma_block_iter_start(struct ib_block_iter *biter,
+			     struct scatterlist *sglist,
+			     unsigned int nents,
+			     unsigned long pgsz);
+bool __rdma_block_iter_next(struct ib_block_iter *biter);
+
+/**
+ * rdma_block_iter_dma_address - get the aligned dma address of the current
+ * block held by the block iterator.
+ * @biter: block iterator holding the memory block
+ */
+static inline dma_addr_t
+rdma_block_iter_dma_address(struct ib_block_iter *biter)
+{
+	return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1);
+}
+
+/**
+ * rdma_for_each_block - iterate over contiguous memory blocks of the sg list
+ * @sglist: sglist to iterate over
+ * @biter: block iterator holding the memory block
+ * @nents: maximum number of sg entries to iterate over
+ * @pgsz: best HW supported page size to use
+ *
+ * Callers may use rdma_block_iter_dma_address() to get each
+ * blocks aligned DMA address.
+ */
+#define rdma_for_each_block(sglist, biter, nents, pgsz)		\
+	for (__rdma_block_iter_start(biter, sglist, nents,	\
+				     pgsz);			\
+	     __rdma_block_iter_next(biter);)
+
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns the client context data set with
+ * ib_set_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
+ */
+static inline void *ib_get_client_data(struct ib_device *device,
+				       struct ib_client *client)
+{
+	return xa_load(&device->client_data, client->client_id);
+}
+void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
+			 void *data);
+void ib_set_device_ops(struct ib_device *device,
+		       const struct ib_device_ops *ops);
+
+int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
+		      unsigned long pfn, unsigned long size, pgprot_t prot,
+		      struct rdma_user_mmap_entry *entry);
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+				struct rdma_user_mmap_entry *entry,
+				size_t length);
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+				      struct rdma_user_mmap_entry *entry,
+				      size_t length, u32 min_pgoff,
+				      u32 max_pgoff);
+
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
+			       unsigned long pgoff);
+struct rdma_user_mmap_entry *
+rdma_user_mmap_entry_get(struct ib_ucontext *ucontext,
+			 struct vm_area_struct *vma);
+void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry);
+
+void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry);
+
+static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len)
+{
+	return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0;
+}
+
+static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
+{
+	return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+}
+
+static inline bool ib_is_buffer_cleared(const void __user *p,
+					size_t len)
+{
+	bool ret;
+	u8 *buf;
+
+	if (len > USHRT_MAX)
+		return false;
+
+	buf = memdup_user(p, len);
+	if (IS_ERR(buf))
+		return false;
+
+	ret = !memchr_inv(buf, 0, len);
+	kfree(buf);
+	return ret;
+}
+
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+				       size_t offset,
+				       size_t len)
+{
+	return ib_is_buffer_cleared(udata->inbuf + offset, len);
+}
+
+/**
+ * ib_is_destroy_retryable - Check whether the uobject destruction
+ * is retryable.
+ * @ret: The initial destruction return code
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * This function is a helper function that IB layer and low-level drivers
+ * can use to consider whether the destruction of the given uobject is
+ * retry-able.
+ * It checks the original return code, if it wasn't success the destruction
+ * is retryable according to the ucontext state (i.e. cleanup_retryable) and
+ * the remove reason. (i.e. why).
+ * Must be called with the object locked for destroy.
+ */
+static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
+					   struct ib_uobject *uobj)
+{
+	return ret && (why == RDMA_REMOVE_DESTROY ||
+		       uobj->context->cleanup_retryable);
+}
+
+/**
+ * ib_destroy_usecnt - Called during destruction to check the usecnt
+ * @usecnt: The usecnt atomic
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * Non-zero usecnts will block destruction unless destruction was triggered by
+ * a ucontext cleanup.
+ */
+static inline int ib_destroy_usecnt(atomic_t *usecnt,
+				    enum rdma_remove_reason why,
+				    struct ib_uobject *uobj)
+{
+	if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
+		return -EBUSY;
+	return 0;
+}
+
+/**
+ * ib_modify_qp_is_ok - Check that the supplied attribute mask
+ * contains all required attributes and no attributes not allowed for
+ * the given QP state transition.
+ * @cur_state: Current QP state
+ * @next_state: Next QP state
+ * @type: QP type
+ * @mask: Mask of supplied QP attributes
+ *
+ * This function is a helper function that a low-level driver's
+ * modify_qp method can use to validate the consumer's input.  It
+ * checks that cur_state and next_state are valid QP states, that a
+ * transition from cur_state to next_state is allowed by the IB spec,
+ * and that the attribute mask supplied is allowed for the transition.
+ */
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+			enum ib_qp_type type, enum ib_qp_attr_mask mask);
+
+void ib_register_event_handler(struct ib_event_handler *event_handler);
+void ib_unregister_event_handler(struct ib_event_handler *event_handler);
+void ib_dispatch_event(const struct ib_event *event);
+
+int ib_query_port(struct ib_device *device,
+		  u8 port_num, struct ib_port_attr *port_attr);
+
+enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device,
+					       u8 port_num);
+
+/**
+ * rdma_cap_ib_switch - Check if the device is IB switch
+ * @device: Device to check
+ *
+ * Device driver is responsible for setting is_switch bit on
+ * in ib_device structure at init time.
+ *
+ * Return: true if the device is IB switch.
+ */
+static inline bool rdma_cap_ib_switch(const struct ib_device *device)
+{
+	return device->is_switch;
+}
+
+/**
+ * rdma_start_port - Return the first valid port number for the device
+ * specified
+ *
+ * @device: Device to be checked
+ *
+ * Return start port number
+ */
+static inline u8 rdma_start_port(const struct ib_device *device)
+{
+	return rdma_cap_ib_switch(device) ? 0 : 1;
+}
+
+/**
+ * rdma_for_each_port - Iterate over all valid port numbers of the IB device
+ * @device - The struct ib_device * to iterate over
+ * @iter - The unsigned int to store the port number
+ */
+#define rdma_for_each_port(device, iter)                                       \
+	for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type(   \
+						     unsigned int, iter)));    \
+	     iter <= rdma_end_port(device); (iter)++)
+
+/**
+ * rdma_end_port - Return the last valid port number for the device
+ * specified
+ *
+ * @device: Device to be checked
+ *
+ * Return last port number
+ */
+static inline u8 rdma_end_port(const struct ib_device *device)
+{
+	return rdma_cap_ib_switch(device) ? 0 : device->phys_port_cnt;
+}
+
+static inline int rdma_is_port_valid(const struct ib_device *device,
+				     unsigned int port)
+{
+	return (port >= rdma_start_port(device) &&
+		port <= rdma_end_port(device));
+}
+
+static inline bool rdma_is_grh_required(const struct ib_device *device,
+					u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_PORT_IB_GRH_REQUIRED;
+}
+
+static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_IB;
+}
+
+static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+}
+
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_ROCE;
+}
+
+static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_IWARP;
+}
+
+static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
+{
+	return rdma_protocol_ib(device, port_num) ||
+		rdma_protocol_roce(device, port_num);
+}
+
+static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_RAW_PACKET;
+}
+
+static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_USNIC;
+}
+
+/**
+ * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Management Datagrams (MAD) are a required part of the InfiniBand
+ * specification and are supported on all InfiniBand devices.  A slightly
+ * extended version are also supported on OPA interfaces.
+ *
+ * Return: true if the port supports sending/receiving of MAD packets.
+ */
+static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_MAD;
+}
+
+/**
+ * rdma_cap_opa_mad - Check if the port of device provides support for OPA
+ * Management Datagrams.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Intel OmniPath devices extend and/or replace the InfiniBand Management
+ * datagrams with their own versions.  These OPA MADs share many but not all of
+ * the characteristics of InfiniBand MADs.
+ *
+ * OPA MADs differ in the following ways:
+ *
+ *    1) MADs are variable size up to 2K
+ *       IBTA defined MADs remain fixed at 256 bytes
+ *    2) OPA SMPs must carry valid PKeys
+ *    3) OPA SMP packets are a different format
+ *
+ * Return: true if the port supports OPA MAD packet formats.
+ */
+static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+		RDMA_CORE_CAP_OPA_MAD;
+}
+
+/**
+ * rdma_cap_ib_smi - Check if the port of a device provides an Infiniband
+ * Subnet Management Agent (SMA) on the Subnet Management Interface (SMI).
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Each InfiniBand node is required to provide a Subnet Management Agent
+ * that the subnet manager can access.  Prior to the fabric being fully
+ * configured by the subnet manager, the SMA is accessed via a well known
+ * interface called the Subnet Management Interface (SMI).  This interface
+ * uses directed route packets to communicate with the SM to get around the
+ * chicken and egg problem of the SM needing to know what's on the fabric
+ * in order to configure the fabric, and needing to configure the fabric in
+ * order to send packets to the devices on the fabric.  These directed
+ * route packets do not need the fabric fully configured in order to reach
+ * their destination.  The SMI is the only method allowed to send
+ * directed route packets on an InfiniBand fabric.
+ *
+ * Return: true if the port provides an SMI.
+ */
+static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_SMI;
+}
+
+/**
+ * rdma_cap_ib_cm - Check if the port of device has the capability Infiniband
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * The InfiniBand Communication Manager is one of many pre-defined General
+ * Service Agents (GSA) that are accessed via the General Service
+ * Interface (GSI).  It's role is to facilitate establishment of connections
+ * between nodes as well as other management related tasks for established
+ * connections.
+ *
+ * Return: true if the port supports an IB CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_CM;
+}
+
+/**
+ * rdma_cap_iw_cm - Check if the port of device has the capability IWARP
+ * Communication Manager.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Similar to above, but specific to iWARP connections which have a different
+ * managment protocol than InfiniBand.
+ *
+ * Return: true if the port supports an iWARP CM (this does not guarantee that
+ * a CM is actually running however).
+ */
+static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IW_CM;
+}
+
+/**
+ * rdma_cap_ib_sa - Check if the port of device has the capability Infiniband
+ * Subnet Administration.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * An InfiniBand Subnet Administration (SA) service is a pre-defined General
+ * Service Agent (GSA) provided by the Subnet Manager (SM).  On InfiniBand
+ * fabrics, devices should resolve routes to other hosts by contacting the
+ * SA to query the proper route.
+ *
+ * Return: true if the port should act as a client to the fabric Subnet
+ * Administration interface.  This does not imply that the SA service is
+ * running locally.
+ */
+static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_SA;
+}
+
+/**
+ * rdma_cap_ib_mcast - Check if the port of device has the capability Infiniband
+ * Multicast.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand multicast registration is more complex than normal IPv4 or
+ * IPv6 multicast registration.  Each Host Channel Adapter must register
+ * with the Subnet Manager when it wishes to join a multicast group.  It
+ * should do so only once regardless of how many queue pairs it subscribes
+ * to this group.  And it should leave the group only after all queue pairs
+ * attached to the group have been detached.
+ *
+ * Return: true if the port must undertake the additional adminstrative
+ * overhead of registering/unregistering with the SM and tracking of the
+ * total number of queue pairs attached to the multicast group.
+ */
+static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num)
+{
+	return rdma_cap_ib_sa(device, port_num);
+}
+
+/**
+ * rdma_cap_af_ib - Check if the port of device has the capability
+ * Native Infiniband Address.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * InfiniBand addressing uses a port's GUID + Subnet Prefix to make a default
+ * GID.  RoCE uses a different mechanism, but still generates a GID via
+ * a prescribed mechanism and port specific data.
+ *
+ * Return: true if the port uses a GID address to identify devices on the
+ * network.
+ */
+static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_AF_IB;
+}
+
+/**
+ * rdma_cap_eth_ah - Check if the port of device has the capability
+ * Ethernet Address Handle.
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE is InfiniBand over Ethernet, and it uses a well defined technique
+ * to fabricate GIDs over Ethernet/IP specific addresses native to the
+ * port.  Normally, packet headers are generated by the sending host
+ * adapter, but when sending connectionless datagrams, we must manually
+ * inject the proper headers for the fabric we are communicating over.
+ *
+ * Return: true if we are running as a RoCE port and must force the
+ * addition of a Global Route Header built from our Ethernet Address
+ * Handle into our header list for connectionless packets.
+ */
+static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_ETH_AH;
+}
+
+/**
+ * rdma_cap_opa_ah - Check if the port of device supports
+ * OPA Address handles
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * Return: true if we are running on an OPA device which supports
+ * the extended OPA addressing.
+ */
+static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
+{
+	return (device->port_data[port_num].immutable.core_cap_flags &
+		RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
+}
+
+/**
+ * rdma_max_mad_size - Return the max MAD size required by this RDMA Port.
+ *
+ * @device: Device
+ * @port_num: Port number
+ *
+ * This MAD size includes the MAD headers and MAD payload.  No other headers
+ * are included.
+ *
+ * Return the max MAD size required by the Port.  Will return 0 if the port
+ * does not support MADs
+ */
+static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
+{
+	return device->port_data[port_num].immutable.max_mad_size;
+}
+
+/**
+ * rdma_cap_roce_gid_table - Check if the port of device uses roce_gid_table
+ * @device: Device to check
+ * @port_num: Port number to check
+ *
+ * RoCE GID table mechanism manages the various GIDs for a device.
+ *
+ * NOTE: if allocating the port's GID table has failed, this call will still
+ * return true, but any RoCE GID table API will fail.
+ *
+ * Return: true if the port uses RoCE GID table mechanism in order to manage
+ * its GIDs.
+ */
+static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
+					   u8 port_num)
+{
+	return rdma_protocol_roce(device, port_num) &&
+		device->ops.add_gid && device->ops.del_gid;
+}
+
+/*
+ * Check if the device supports READ W/ INVALIDATE.
+ */
+static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
+{
+	/*
+	 * iWarp drivers must support READ W/ INVALIDATE.  No other protocol
+	 * has support for it yet.
+	 */
+	return rdma_protocol_iwarp(dev, port_num);
+}
+
+/**
+ * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not.
+ * @device: Device
+ * @port_num: 1 based Port number
+ *
+ * Return true if port is an Intel OPA port , false if not
+ */
+static inline bool rdma_core_cap_opa_port(struct ib_device *device,
+					  u32 port_num)
+{
+	return (device->port_data[port_num].immutable.core_cap_flags &
+		RDMA_CORE_PORT_INTEL_OPA) == RDMA_CORE_PORT_INTEL_OPA;
+}
+
+/**
+ * rdma_mtu_enum_to_int - Return the mtu of the port as an integer value.
+ * @device: Device
+ * @port_num: Port number
+ * @mtu: enum value of MTU
+ *
+ * Return the MTU size supported by the port as an integer value. Will return
+ * -1 if enum value of mtu is not supported.
+ */
+static inline int rdma_mtu_enum_to_int(struct ib_device *device, u8 port,
+				       int mtu)
+{
+	if (rdma_core_cap_opa_port(device, port))
+		return opa_mtu_enum_to_int((enum opa_mtu)mtu);
+	else
+		return ib_mtu_enum_to_int((enum ib_mtu)mtu);
+}
+
+/**
+ * rdma_mtu_from_attr - Return the mtu of the port from the port attribute.
+ * @device: Device
+ * @port_num: Port number
+ * @attr: port attribute
+ *
+ * Return the MTU size supported by the port as an integer value.
+ */
+static inline int rdma_mtu_from_attr(struct ib_device *device, u8 port,
+				     struct ib_port_attr *attr)
+{
+	if (rdma_core_cap_opa_port(device, port))
+		return attr->phys_mtu;
+	else
+		return ib_mtu_enum_to_int(attr->max_mtu);
+}
+
+int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+			 int state);
+int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+		     struct ifla_vf_info *info);
+int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+		    struct ifla_vf_stats *stats);
+int ib_get_vf_guid(struct ib_device *device, int vf, u8 port,
+		    struct ifla_vf_guid *node_guid,
+		    struct ifla_vf_guid *port_guid);
+int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+		   int type);
+
+int ib_query_pkey(struct ib_device *device,
+		  u8 port_num, u16 index, u16 *pkey);
+
+int ib_modify_device(struct ib_device *device,
+		     int device_modify_mask,
+		     struct ib_device_modify *device_modify);
+
+int ib_modify_port(struct ib_device *device,
+		   u8 port_num, int port_modify_mask,
+		   struct ib_port_modify *port_modify);
+
+int ib_find_gid(struct ib_device *device, union ib_gid *gid,
+		u8 *port_num, u16 *index);
+
+int ib_find_pkey(struct ib_device *device,
+		 u8 port_num, u16 pkey, u16 *index);
+
+enum ib_pd_flags {
+	/*
+	 * Create a memory registration for all memory in the system and place
+	 * the rkey for it into pd->unsafe_global_rkey.  This can be used by
+	 * ULPs to avoid the overhead of dynamic MRs.
+	 *
+	 * This flag is generally considered unsafe and must only be used in
+	 * extremly trusted environments.  Every use of it will log a warning
+	 * in the kernel log.
+	 */
+	IB_PD_UNSAFE_GLOBAL_RKEY	= 0x01,
+};
+
+struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
+		const char *caller);
+
+#define ib_alloc_pd(device, flags) \
+	__ib_alloc_pd((device), (flags), KBUILD_MODNAME)
+
+int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata);
+
+/**
+ * ib_dealloc_pd - Deallocate kernel PD
+ * @pd: The protection domain
+ *
+ * NOTE: for user PD use ib_dealloc_pd_user with valid udata!
+ */
+static inline void ib_dealloc_pd(struct ib_pd *pd)
+{
+	int ret = ib_dealloc_pd_user(pd, NULL);
+
+	WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail");
+}
+
+enum rdma_create_ah_flags {
+	/* In a sleepable context */
+	RDMA_CREATE_AH_SLEEPABLE = BIT(0),
+};
+
+/**
+ * rdma_create_ah - Creates an address handle for the given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
+ *
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+			     u32 flags);
+
+/**
+ * rdma_create_user_ah - Creates an address handle for the given address vector.
+ * It resolves destination mac address for ah attribute of RoCE type.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ * @udata: pointer to user's input output buffer information need by
+ *         provider driver.
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
+				  struct rdma_ah_attr *ah_attr,
+				  struct ib_udata *udata);
+/**
+ * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header
+ *   work completion.
+ * @hdr: the L3 header to parse
+ * @net_type: type of header to parse
+ * @sgid: place to store source gid
+ * @dgid: place to store destination gid
+ */
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+			      enum rdma_network_type net_type,
+			      union ib_gid *sgid, union ib_gid *dgid);
+
+/**
+ * ib_get_rdma_header_version - Get the header version
+ * @hdr: the L3 header to parse
+ */
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
+
+/**
+ * ib_init_ah_attr_from_wc - Initializes address handle attributes from a
+ *   work completion.
+ * @device: Device on which the received message arrived.
+ * @port_num: Port on which the received message arrived.
+ * @wc: Work completion associated with the received message.
+ * @grh: References the received global route header.  This parameter is
+ *   ignored unless the work completion indicates that the GRH is valid.
+ * @ah_attr: Returned attributes that can be used when creating an address
+ *   handle for replying to the message.
+ * When ib_init_ah_attr_from_wc() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_wc().
+ *
+ */
+int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
+			    const struct ib_wc *wc, const struct ib_grh *grh,
+			    struct rdma_ah_attr *ah_attr);
+
+/**
+ * ib_create_ah_from_wc - Creates an address handle associated with the
+ *   sender of the specified work completion.
+ * @pd: The protection domain associated with the address handle.
+ * @wc: Work completion information associated with a received message.
+ * @grh: References the received global route header.  This parameter is
+ *   ignored unless the work completion indicates that the GRH is valid.
+ * @port_num: The outbound port number to associate with the address.
+ *
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
+struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
+				   const struct ib_grh *grh, u8 port_num);
+
+/**
+ * rdma_modify_ah - Modifies the address vector associated with an address
+ *   handle.
+ * @ah: The address handle to modify.
+ * @ah_attr: The new address vector attributes to associate with the
+ *   address handle.
+ */
+int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+
+/**
+ * rdma_query_ah - Queries the address vector associated with an address
+ *   handle.
+ * @ah: The address handle to query.
+ * @ah_attr: The address vector attributes associated with the address
+ *   handle.
+ */
+int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
+
+enum rdma_destroy_ah_flags {
+	/* In a sleepable context */
+	RDMA_DESTROY_AH_SLEEPABLE = BIT(0),
+};
+
+/**
+ * rdma_destroy_ah_user - Destroys an address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ * @udata: Valid user data or NULL for kernel objects
+ */
+int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata);
+
+/**
+ * rdma_destroy_ah - Destroys an kernel address handle.
+ * @ah: The address handle to destroy.
+ * @flags: Destroy address handle flags (see enum rdma_destroy_ah_flags).
+ *
+ * NOTE: for user ah use rdma_destroy_ah_user with valid udata!
+ */
+static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags)
+{
+	int ret = rdma_destroy_ah_user(ah, flags, NULL);
+
+	WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail");
+}
+
+struct ib_srq *ib_create_srq_user(struct ib_pd *pd,
+				  struct ib_srq_init_attr *srq_init_attr,
+				  struct ib_usrq_object *uobject,
+				  struct ib_udata *udata);
+static inline struct ib_srq *
+ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr)
+{
+	if (!pd->device->ops.create_srq)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	return ib_create_srq_user(pd, srq_init_attr, NULL, NULL);
+}
+
+/**
+ * ib_modify_srq - Modifies the attributes for the specified SRQ.
+ * @srq: The SRQ to modify.
+ * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
+ *   the current values of selected SRQ attributes are returned.
+ * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
+ *   are being modified.
+ *
+ * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
+ * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
+ * the number of receives queued drops below the limit.
+ */
+int ib_modify_srq(struct ib_srq *srq,
+		  struct ib_srq_attr *srq_attr,
+		  enum ib_srq_attr_mask srq_attr_mask);
+
+/**
+ * ib_query_srq - Returns the attribute list and current values for the
+ *   specified SRQ.
+ * @srq: The SRQ to query.
+ * @srq_attr: The attributes of the specified SRQ.
+ */
+int ib_query_srq(struct ib_srq *srq,
+		 struct ib_srq_attr *srq_attr);
+
+/**
+ * ib_destroy_srq_user - Destroys the specified SRQ.
+ * @srq: The SRQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
+ */
+int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_srq - Destroys the specified kernel SRQ.
+ * @srq: The SRQ to destroy.
+ *
+ * NOTE: for user srq use ib_destroy_srq_user with valid udata!
+ */
+static inline void ib_destroy_srq(struct ib_srq *srq)
+{
+	int ret = ib_destroy_srq_user(srq, NULL);
+
+	WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail");
+}
+
+/**
+ * ib_post_srq_recv - Posts a list of work requests to the specified SRQ.
+ * @srq: The SRQ to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_srq_recv(struct ib_srq *srq,
+				   const struct ib_recv_wr *recv_wr,
+				   const struct ib_recv_wr **bad_recv_wr)
+{
+	const struct ib_recv_wr *dummy;
+
+	return srq->device->ops.post_srq_recv(srq, recv_wr,
+					      bad_recv_wr ? : &dummy);
+}
+
+struct ib_qp *ib_create_qp(struct ib_pd *pd,
+			   struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
+ * @qp: The QP to modify.
+ * @attr: On input, specifies the QP attributes to modify.  On output,
+ *   the current values of selected QP attributes are returned.
+ * @attr_mask: A bit-mask used to specify which attributes of the QP
+ *   are being modified.
+ * @udata: pointer to user's input output buffer information
+ *   are being modified.
+ * It returns 0 on success and returns appropriate error code on error.
+ */
+int ib_modify_qp_with_udata(struct ib_qp *qp,
+			    struct ib_qp_attr *attr,
+			    int attr_mask,
+			    struct ib_udata *udata);
+
+/**
+ * ib_modify_qp - Modifies the attributes for the specified QP and then
+ *   transitions the QP to the given state.
+ * @qp: The QP to modify.
+ * @qp_attr: On input, specifies the QP attributes to modify.  On output,
+ *   the current values of selected QP attributes are returned.
+ * @qp_attr_mask: A bit-mask used to specify which attributes of the QP
+ *   are being modified.
+ */
+int ib_modify_qp(struct ib_qp *qp,
+		 struct ib_qp_attr *qp_attr,
+		 int qp_attr_mask);
+
+/**
+ * ib_query_qp - Returns the attribute list and current values for the
+ *   specified QP.
+ * @qp: The QP to query.
+ * @qp_attr: The attributes of the specified QP.
+ * @qp_attr_mask: A bit-mask used to select specific attributes to query.
+ * @qp_init_attr: Additional attributes of the selected QP.
+ *
+ * The qp_attr_mask may be used to limit the query to gathering only the
+ * selected attributes.
+ */
+int ib_query_qp(struct ib_qp *qp,
+		struct ib_qp_attr *qp_attr,
+		int qp_attr_mask,
+		struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * ib_destroy_qp - Destroys the specified QP.
+ * @qp: The QP to destroy.
+ * @udata: Valid udata or NULL for kernel objects
+ */
+int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata);
+
+/**
+ * ib_destroy_qp - Destroys the specified kernel QP.
+ * @qp: The QP to destroy.
+ *
+ * NOTE: for user qp use ib_destroy_qp_user with valid udata!
+ */
+static inline int ib_destroy_qp(struct ib_qp *qp)
+{
+	return ib_destroy_qp_user(qp, NULL);
+}
+
+/**
+ * ib_open_qp - Obtain a reference to an existing sharable QP.
+ * @xrcd - XRC domain
+ * @qp_open_attr: Attributes identifying the QP to open.
+ *
+ * Returns a reference to a sharable QP.
+ */
+struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
+			 struct ib_qp_open_attr *qp_open_attr);
+
+/**
+ * ib_close_qp - Release an external reference to a QP.
+ * @qp: The QP handle to release
+ *
+ * The opened QP handle is released by the caller.  The underlying
+ * shared QP is not destroyed until all internal references are released.
+ */
+int ib_close_qp(struct ib_qp *qp);
+
+/**
+ * ib_post_send - Posts a list of work requests to the send queue of
+ *   the specified QP.
+ * @qp: The QP to post the work request on.
+ * @send_wr: A list of work requests to post on the send queue.
+ * @bad_send_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
+ */
+static inline int ib_post_send(struct ib_qp *qp,
+			       const struct ib_send_wr *send_wr,
+			       const struct ib_send_wr **bad_send_wr)
+{
+	const struct ib_send_wr *dummy;
+
+	return qp->device->ops.post_send(qp, send_wr, bad_send_wr ? : &dummy);
+}
+
+/**
+ * ib_post_recv - Posts a list of work requests to the receive queue of
+ *   the specified QP.
+ * @qp: The QP to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ */
+static inline int ib_post_recv(struct ib_qp *qp,
+			       const struct ib_recv_wr *recv_wr,
+			       const struct ib_recv_wr **bad_recv_wr)
+{
+	const struct ib_recv_wr *dummy;
+
+	return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
+}
+
+struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe,
+			    int comp_vector, enum ib_poll_context poll_ctx,
+			    const char *caller);
+static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+					int nr_cqe, int comp_vector,
+					enum ib_poll_context poll_ctx)
+{
+	return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx,
+			     KBUILD_MODNAME);
+}
+
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+				int nr_cqe, enum ib_poll_context poll_ctx,
+				const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+					    void *private, int nr_cqe,
+					    enum ib_poll_context poll_ctx)
+{
+	return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+				 KBUILD_MODNAME);
+}
+
+void ib_free_cq(struct ib_cq *cq);
+int ib_process_cq_direct(struct ib_cq *cq, int budget);
+
+/**
+ * ib_create_cq - Creates a CQ on the specified device.
+ * @device: The device on which to create the CQ.
+ * @comp_handler: A user-specified callback that is invoked when a
+ *   completion event occurs on the CQ.
+ * @event_handler: A user-specified callback that is invoked when an
+ *   asynchronous event not associated with a completion occurs on the CQ.
+ * @cq_context: Context associated with the CQ returned to the user via
+ *   the associated completion and event handlers.
+ * @cq_attr: The attributes the CQ should be created upon.
+ *
+ * Users can examine the cq structure to determine the actual CQ size.
+ */
+struct ib_cq *__ib_create_cq(struct ib_device *device,
+			     ib_comp_handler comp_handler,
+			     void (*event_handler)(struct ib_event *, void *),
+			     void *cq_context,
+			     const struct ib_cq_init_attr *cq_attr,
+			     const char *caller);
+#define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \
+	__ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME)
+
+/**
+ * ib_resize_cq - Modifies the capacity of the CQ.
+ * @cq: The CQ to resize.
+ * @cqe: The minimum size of the CQ.
+ *
+ * Users can examine the cq structure to determine the actual CQ size.
+ */
+int ib_resize_cq(struct ib_cq *cq, int cqe);
+
+/**
+ * rdma_set_cq_moderation - Modifies moderation params of the CQ
+ * @cq: The CQ to modify.
+ * @cq_count: number of CQEs that will trigger an event
+ * @cq_period: max period of time in usec before triggering an event
+ *
+ */
+int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+
+/**
+ * ib_destroy_cq_user - Destroys the specified CQ.
+ * @cq: The CQ to destroy.
+ * @udata: Valid user data or NULL for kernel objects
+ */
+int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata);
+
+/**
+ * ib_destroy_cq - Destroys the specified kernel CQ.
+ * @cq: The CQ to destroy.
+ *
+ * NOTE: for user cq use ib_destroy_cq_user with valid udata!
+ */
+static inline void ib_destroy_cq(struct ib_cq *cq)
+{
+	int ret = ib_destroy_cq_user(cq, NULL);
+
+	WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
+}
+
+/**
+ * ib_poll_cq - poll a CQ for completion(s)
+ * @cq:the CQ being polled
+ * @num_entries:maximum number of completions to return
+ * @wc:array of at least @num_entries &struct ib_wc where completions
+ *   will be returned
+ *
+ * Poll a CQ for (possibly multiple) completions.  If the return value
+ * is < 0, an error occurred.  If the return value is >= 0, it is the
+ * number of completions returned.  If the return value is
+ * non-negative and < num_entries, then the CQ was emptied.
+ */
+static inline int ib_poll_cq(struct ib_cq *cq, int num_entries,
+			     struct ib_wc *wc)
+{
+	return cq->device->ops.poll_cq(cq, num_entries, wc);
+}
+
+/**
+ * ib_req_notify_cq - Request completion notification on a CQ.
+ * @cq: The CQ to generate an event for.
+ * @flags:
+ *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
+ *   to request an event on the next solicited event or next work
+ *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
+ *   may also be |ed in to request a hint about missed events, as
+ *   described below.
+ *
+ * Return Value:
+ *    < 0 means an error occurred while requesting notification
+ *   == 0 means notification was requested successfully, and if
+ *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
+ *        were missed and it is safe to wait for another event.  In
+ *        this case is it guaranteed that any work completions added
+ *        to the CQ since the last CQ poll will trigger a completion
+ *        notification event.
+ *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
+ *        in.  It means that the consumer must poll the CQ again to
+ *        make sure it is empty to avoid missing an event because of a
+ *        race between requesting notification and an entry being
+ *        added to the CQ.  This return value means it is possible
+ *        (but not guaranteed) that a work completion has been added
+ *        to the CQ since the last poll without triggering a
+ *        completion notification event.
+ */
+static inline int ib_req_notify_cq(struct ib_cq *cq,
+				   enum ib_cq_notify_flags flags)
+{
+	return cq->device->ops.req_notify_cq(cq, flags);
+}
+
+struct ib_cq *ib_cq_pool_get(struct ib_device *dev, unsigned int nr_cqe,
+			     int comp_vector_hint,
+			     enum ib_poll_context poll_ctx);
+
+void ib_cq_pool_put(struct ib_cq *cq, unsigned int nr_cqe);
+
+/**
+ * ib_req_ncomp_notif - Request completion notification when there are
+ *   at least the specified number of unreaped completions on the CQ.
+ * @cq: The CQ to generate an event for.
+ * @wc_cnt: The number of unreaped completions that should be on the
+ *   CQ before an event is generated.
+ */
+static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
+{
+	return cq->device->ops.req_ncomp_notif ?
+		cq->device->ops.req_ncomp_notif(cq, wc_cnt) :
+		-ENOSYS;
+}
+
+/*
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
+ * address into the dma address.
+ */
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
+{
+	return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
+}
+
+/**
+ * ib_dma_mapping_error - check a DMA addr for error
+ * @dev: The device for which the dma_addr was created
+ * @dma_addr: The DMA address to check
+ */
+static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+	if (ib_uses_virt_dma(dev))
+		return 0;
+	return dma_mapping_error(dev->dma_device, dma_addr);
+}
+
+/**
+ * ib_dma_map_single - Map a kernel virtual address to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @cpu_addr: The kernel virtual address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_single(struct ib_device *dev,
+				    void *cpu_addr, size_t size,
+				    enum dma_data_direction direction)
+{
+	if (ib_uses_virt_dma(dev))
+		return (uintptr_t)cpu_addr;
+	return dma_map_single(dev->dma_device, cpu_addr, size, direction);
+}
+
+/**
+ * ib_dma_unmap_single - Destroy a mapping created by ib_dma_map_single()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_single(struct ib_device *dev,
+				       u64 addr, size_t size,
+				       enum dma_data_direction direction)
+{
+	if (!ib_uses_virt_dma(dev))
+		dma_unmap_single(dev->dma_device, addr, size, direction);
+}
+
+/**
+ * ib_dma_map_page - Map a physical page to DMA address
+ * @dev: The device for which the dma_addr is to be created
+ * @page: The page to be mapped
+ * @offset: The offset within the page
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline u64 ib_dma_map_page(struct ib_device *dev,
+				  struct page *page,
+				  unsigned long offset,
+				  size_t size,
+					 enum dma_data_direction direction)
+{
+	if (ib_uses_virt_dma(dev))
+		return (uintptr_t)(page_address(page) + offset);
+	return dma_map_page(dev->dma_device, page, offset, size, direction);
+}
+
+/**
+ * ib_dma_unmap_page - Destroy a mapping created by ib_dma_map_page()
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_page(struct ib_device *dev,
+				     u64 addr, size_t size,
+				     enum dma_data_direction direction)
+{
+	if (!ib_uses_virt_dma(dev))
+		dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+				      struct scatterlist *sg, int nents,
+				      enum dma_data_direction direction,
+				      unsigned long dma_attrs)
+{
+	if (ib_uses_virt_dma(dev))
+		return ib_dma_virt_map_sg(dev, sg, nents);
+	return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+				dma_attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+					 struct scatterlist *sg, int nents,
+					 enum dma_data_direction direction,
+					 unsigned long dma_attrs)
+{
+	if (!ib_uses_virt_dma(dev))
+		dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
+				   dma_attrs);
+}
+
+/**
+ * ib_dma_map_sg - Map a scatter/gather list to DMA addresses
+ * @dev: The device for which the DMA addresses are to be created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline int ib_dma_map_sg(struct ib_device *dev,
+				struct scatterlist *sg, int nents,
+				enum dma_data_direction direction)
+{
+	return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
+}
+
+/**
+ * ib_dma_unmap_sg - Unmap a scatter/gather list of DMA addresses
+ * @dev: The device for which the DMA addresses were created
+ * @sg: The array of scatter/gather entries
+ * @nents: The number of scatter/gather entries
+ * @direction: The direction of the DMA
+ */
+static inline void ib_dma_unmap_sg(struct ib_device *dev,
+				   struct scatterlist *sg, int nents,
+				   enum dma_data_direction direction)
+{
+	ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
+}
+
+/**
+ * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
+ * @dev: The device to query
+ *
+ * The returned value represents a size in bytes.
+ */
+static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
+{
+	if (ib_uses_virt_dma(dev))
+		return UINT_MAX;
+	return dma_get_max_seg_size(dev->dma_device);
+}
+
+/**
+ * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
+					      u64 addr,
+					      size_t size,
+					      enum dma_data_direction dir)
+{
+	if (!ib_uses_virt_dma(dev))
+		dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_sync_single_for_device - Prepare DMA region to be accessed by device
+ * @dev: The device for which the DMA address was created
+ * @addr: The DMA address
+ * @size: The size of the region in bytes
+ * @dir: The direction of the DMA
+ */
+static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
+						 u64 addr,
+						 size_t size,
+						 enum dma_data_direction dir)
+{
+	if (!ib_uses_virt_dma(dev))
+		dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+}
+
+/**
+ * ib_dma_alloc_coherent - Allocate memory and map it for DMA
+ * @dev: The device for which the DMA address is requested
+ * @size: The size of the region to allocate in bytes
+ * @dma_handle: A pointer for returning the DMA address of the region
+ * @flag: memory allocator flags
+ */
+static inline void *ib_dma_alloc_coherent(struct ib_device *dev,
+					   size_t size,
+					   dma_addr_t *dma_handle,
+					   gfp_t flag)
+{
+	return dma_alloc_coherent(dev->dma_device, size, dma_handle, flag);
+}
+
+/**
+ * ib_dma_free_coherent - Free memory allocated by ib_dma_alloc_coherent()
+ * @dev: The device for which the DMA addresses were allocated
+ * @size: The size of the region
+ * @cpu_addr: the address returned by ib_dma_alloc_coherent()
+ * @dma_handle: the DMA address returned by ib_dma_alloc_coherent()
+ */
+static inline void ib_dma_free_coherent(struct ib_device *dev,
+					size_t size, void *cpu_addr,
+					dma_addr_t dma_handle)
+{
+	dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
+}
+
+/* ib_reg_user_mr - register a memory region for virtual addresses from kernel
+ * space. This function should be called when 'current' is the owning MM.
+ */
+struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+			     u64 virt_addr, int mr_access_flags);
+
+/* ib_advise_mr -  give an advice about an address range in a memory region */
+int ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
+		 u32 flags, struct ib_sge *sg_list, u32 num_sge);
+/**
+ * ib_dereg_mr_user - Deregisters a memory region and removes it from the
+ *   HCA translation table.
+ * @mr: The memory region to deregister.
+ * @udata: Valid user data or NULL for kernel object
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ */
+int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata);
+
+/**
+ * ib_dereg_mr - Deregisters a kernel memory region and removes it from the
+ *   HCA translation table.
+ * @mr: The memory region to deregister.
+ *
+ * This function can fail, if the memory region has memory windows bound to it.
+ *
+ * NOTE: for user mr use ib_dereg_mr_user with valid udata!
+ */
+static inline int ib_dereg_mr(struct ib_mr *mr)
+{
+	return ib_dereg_mr_user(mr, NULL);
+}
+
+struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+			  u32 max_num_sg);
+
+struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd,
+				    u32 max_num_data_sg,
+				    u32 max_num_meta_sg);
+
+/**
+ * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
+ *   R_Key and L_Key.
+ * @mr - struct ib_mr pointer to be updated.
+ * @newkey - new key to be used.
+ */
+static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey)
+{
+	mr->lkey = (mr->lkey & 0xffffff00) | newkey;
+	mr->rkey = (mr->rkey & 0xffffff00) | newkey;
+}
+
+/**
+ * ib_inc_rkey - increments the key portion of the given rkey. Can be used
+ * for calculating a new rkey for type 2 memory windows.
+ * @rkey - the rkey to increment.
+ */
+static inline u32 ib_inc_rkey(u32 rkey)
+{
+	const u32 mask = 0x000000ff;
+	return ((rkey + 1) & mask) | (rkey & ~mask);
+}
+
+/**
+ * ib_attach_mcast - Attaches the specified QP to a multicast group.
+ * @qp: QP to attach to the multicast group.  The QP must be type
+ *   IB_QPT_UD.
+ * @gid: Multicast group GID.
+ * @lid: Multicast group LID in host byte order.
+ *
+ * In order to send and receive multicast packets, subnet
+ * administration must have created the multicast group and configured
+ * the fabric appropriately.  The port associated with the specified
+ * QP must also be a member of the multicast group.
+ */
+int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+/**
+ * ib_detach_mcast - Detaches the specified QP from a multicast group.
+ * @qp: QP to detach from the multicast group.
+ * @gid: Multicast group GID.
+ * @lid: Multicast group LID in host byte order.
+ */
+int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid);
+
+struct ib_xrcd *ib_alloc_xrcd_user(struct ib_device *device,
+				   struct inode *inode, struct ib_udata *udata);
+int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
+
+static inline int ib_check_mr_access(int flags)
+{
+	/*
+	 * Local write permission is required if remote write or
+	 * remote atomic permission is also requested.
+	 */
+	if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) &&
+	    !(flags & IB_ACCESS_LOCAL_WRITE))
+		return -EINVAL;
+
+	if (flags & ~IB_ACCESS_SUPPORTED)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline bool ib_access_writable(int access_flags)
+{
+	/*
+	 * We have writable memory backing the MR if any of the following
+	 * access flags are set.  "Local write" and "remote write" obviously
+	 * require write access.  "Remote atomic" can do things like fetch and
+	 * add, which will modify memory, and "MW bind" can change permissions
+	 * by binding a window.
+	 */
+	return access_flags &
+		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND);
+}
+
+/**
+ * ib_check_mr_status: lightweight check of MR status.
+ *     This routine may provide status checks on a selected
+ *     ib_mr. first use is for signature status check.
+ *
+ * @mr: A memory region.
+ * @check_mask: Bitmask of which checks to perform from
+ *     ib_mr_status_check enumeration.
+ * @mr_status: The container of relevant status checks.
+ *     failed checks will be indicated in the status bitmask
+ *     and the relevant info shall be in the error item.
+ */
+int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
+		       struct ib_mr_status *mr_status);
+
+/**
+ * ib_device_try_get: Hold a registration lock
+ * device: The device to lock
+ *
+ * A device under an active registration lock cannot become unregistered. It
+ * is only possible to obtain a registration lock on a device that is fully
+ * registered, otherwise this function returns false.
+ *
+ * The registration lock is only necessary for actions which require the
+ * device to still be registered. Uses that only require the device pointer to
+ * be valid should use get_device(&ibdev->dev) to hold the memory.
+ *
+ */
+static inline bool ib_device_try_get(struct ib_device *dev)
+{
+	return refcount_inc_not_zero(&dev->refcount);
+}
+
+void ib_device_put(struct ib_device *device);
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+					  enum rdma_driver_id driver_id);
+struct ib_device *ib_device_get_by_name(const char *name,
+					enum rdma_driver_id driver_id);
+struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
+					    u16 pkey, const union ib_gid *gid,
+					    const struct sockaddr *addr);
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
+			 unsigned int port);
+struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
+
+struct ib_wq *ib_create_wq(struct ib_pd *pd,
+			   struct ib_wq_init_attr *init_attr);
+int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata);
+int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr,
+		 u32 wq_attr_mask);
+
+int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+		 unsigned int *sg_offset, unsigned int page_size);
+int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg,
+		    int data_sg_nents, unsigned int *data_sg_offset,
+		    struct scatterlist *meta_sg, int meta_sg_nents,
+		    unsigned int *meta_sg_offset, unsigned int page_size);
+
+static inline int
+ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
+		  unsigned int *sg_offset, unsigned int page_size)
+{
+	int n;
+
+	n = ib_map_mr_sg(mr, sg, sg_nents, sg_offset, page_size);
+	mr->iova = 0;
+
+	return n;
+}
+
+int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
+		unsigned int *sg_offset, int (*set_page)(struct ib_mr *, u64));
+
+void ib_drain_rq(struct ib_qp *qp);
+void ib_drain_sq(struct ib_qp *qp);
+void ib_drain_qp(struct ib_qp *qp);
+
+int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width);
+
+static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_ROCE)
+		return attr->roce.dmac;
+	return NULL;
+}
+
+static inline void rdma_ah_set_dlid(struct rdma_ah_attr *attr, u32 dlid)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
+		attr->ib.dlid = (u16)dlid;
+	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
+		attr->opa.dlid = dlid;
+}
+
+static inline u32 rdma_ah_get_dlid(const struct rdma_ah_attr *attr)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
+		return attr->ib.dlid;
+	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
+		return attr->opa.dlid;
+	return 0;
+}
+
+static inline void rdma_ah_set_sl(struct rdma_ah_attr *attr, u8 sl)
+{
+	attr->sl = sl;
+}
+
+static inline u8 rdma_ah_get_sl(const struct rdma_ah_attr *attr)
+{
+	return attr->sl;
+}
+
+static inline void rdma_ah_set_path_bits(struct rdma_ah_attr *attr,
+					 u8 src_path_bits)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
+		attr->ib.src_path_bits = src_path_bits;
+	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
+		attr->opa.src_path_bits = src_path_bits;
+}
+
+static inline u8 rdma_ah_get_path_bits(const struct rdma_ah_attr *attr)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_IB)
+		return attr->ib.src_path_bits;
+	else if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
+		return attr->opa.src_path_bits;
+	return 0;
+}
+
+static inline void rdma_ah_set_make_grd(struct rdma_ah_attr *attr,
+					bool make_grd)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
+		attr->opa.make_grd = make_grd;
+}
+
+static inline bool rdma_ah_get_make_grd(const struct rdma_ah_attr *attr)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_OPA)
+		return attr->opa.make_grd;
+	return false;
+}
+
+static inline void rdma_ah_set_port_num(struct rdma_ah_attr *attr, u8 port_num)
+{
+	attr->port_num = port_num;
+}
+
+static inline u8 rdma_ah_get_port_num(const struct rdma_ah_attr *attr)
+{
+	return attr->port_num;
+}
+
+static inline void rdma_ah_set_static_rate(struct rdma_ah_attr *attr,
+					   u8 static_rate)
+{
+	attr->static_rate = static_rate;
+}
+
+static inline u8 rdma_ah_get_static_rate(const struct rdma_ah_attr *attr)
+{
+	return attr->static_rate;
+}
+
+static inline void rdma_ah_set_ah_flags(struct rdma_ah_attr *attr,
+					enum ib_ah_flags flag)
+{
+	attr->ah_flags = flag;
+}
+
+static inline enum ib_ah_flags
+		rdma_ah_get_ah_flags(const struct rdma_ah_attr *attr)
+{
+	return attr->ah_flags;
+}
+
+static inline const struct ib_global_route
+		*rdma_ah_read_grh(const struct rdma_ah_attr *attr)
+{
+	return &attr->grh;
+}
+
+/*To retrieve and modify the grh */
+static inline struct ib_global_route
+		*rdma_ah_retrieve_grh(struct rdma_ah_attr *attr)
+{
+	return &attr->grh;
+}
+
+static inline void rdma_ah_set_dgid_raw(struct rdma_ah_attr *attr, void *dgid)
+{
+	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
+
+	memcpy(grh->dgid.raw, dgid, sizeof(grh->dgid));
+}
+
+static inline void rdma_ah_set_subnet_prefix(struct rdma_ah_attr *attr,
+					     __be64 prefix)
+{
+	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
+
+	grh->dgid.global.subnet_prefix = prefix;
+}
+
+static inline void rdma_ah_set_interface_id(struct rdma_ah_attr *attr,
+					    __be64 if_id)
+{
+	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
+
+	grh->dgid.global.interface_id = if_id;
+}
+
+static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
+				   union ib_gid *dgid, u32 flow_label,
+				   u8 sgid_index, u8 hop_limit,
+				   u8 traffic_class)
+{
+	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
+
+	attr->ah_flags = IB_AH_GRH;
+	if (dgid)
+		grh->dgid = *dgid;
+	grh->flow_label = flow_label;
+	grh->sgid_index = sgid_index;
+	grh->hop_limit = hop_limit;
+	grh->traffic_class = traffic_class;
+	grh->sgid_attr = NULL;
+}
+
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+			     u32 flow_label, u8 hop_limit, u8 traffic_class,
+			     const struct ib_gid_attr *sgid_attr);
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+		       const struct rdma_ah_attr *src);
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+			  const struct rdma_ah_attr *new);
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
+
+/**
+ * rdma_ah_find_type - Return address handle type.
+ *
+ * @dev: Device to be checked
+ * @port_num: Port number
+ */
+static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
+						       u8 port_num)
+{
+	if (rdma_protocol_roce(dev, port_num))
+		return RDMA_AH_ATTR_TYPE_ROCE;
+	if (rdma_protocol_ib(dev, port_num)) {
+		if (rdma_cap_opa_ah(dev, port_num))
+			return RDMA_AH_ATTR_TYPE_OPA;
+		return RDMA_AH_ATTR_TYPE_IB;
+	}
+
+	return RDMA_AH_ATTR_TYPE_UNDEFINED;
+}
+
+/**
+ * ib_lid_cpu16 - Return lid in 16bit CPU encoding.
+ *     In the current implementation the only way to get
+ *     get the 32bit lid is from other sources for OPA.
+ *     For IB, lids will always be 16bits so cast the
+ *     value accordingly.
+ *
+ * @lid: A 32bit LID
+ */
+static inline u16 ib_lid_cpu16(u32 lid)
+{
+	WARN_ON_ONCE(lid & 0xFFFF0000);
+	return (u16)lid;
+}
+
+/**
+ * ib_lid_be16 - Return lid in 16bit BE encoding.
+ *
+ * @lid: A 32bit LID
+ */
+static inline __be16 ib_lid_be16(u32 lid)
+{
+	WARN_ON_ONCE(lid & 0xFFFF0000);
+	return cpu_to_be16((u16)lid);
+}
+
+/**
+ * ib_get_vector_affinity - Get the affinity mappings of a given completion
+ *   vector
+ * @device:         the rdma device
+ * @comp_vector:    index of completion vector
+ *
+ * Returns NULL on failure, otherwise a corresponding cpu map of the
+ * completion vector (returns all-cpus map if the device driver doesn't
+ * implement get_vector_affinity).
+ */
+static inline const struct cpumask *
+ib_get_vector_affinity(struct ib_device *device, int comp_vector)
+{
+	if (comp_vector < 0 || comp_vector >= device->num_comp_vectors ||
+	    !device->ops.get_vector_affinity)
+		return NULL;
+
+	return device->ops.get_vector_affinity(device, comp_vector);
+
+}
+
+/**
+ * rdma_roce_rescan_device - Rescan all of the network devices in the system
+ * and add their gids, as needed, to the relevant RoCE devices.
+ *
+ * @device:         the rdma device
+ */
+void rdma_roce_rescan_device(struct ib_device *ibdev);
+
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
+
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
+
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+				     enum rdma_netdev_t type, const char *name,
+				     unsigned char name_assign_type,
+				     void (*setup)(struct net_device *));
+
+int rdma_init_netdev(struct ib_device *device, u8 port_num,
+		     enum rdma_netdev_t type, const char *name,
+		     unsigned char name_assign_type,
+		     void (*setup)(struct net_device *),
+		     struct net_device *netdev);
+
+/**
+ * rdma_set_device_sysfs_group - Set device attributes group to have
+ *				 driver specific sysfs entries at
+ *				 for infiniband class.
+ *
+ * @device:	device pointer for which attributes to be created
+ * @group:	Pointer to group which should be added when device
+ *		is registered with sysfs.
+ * rdma_set_device_sysfs_group() allows existing drivers to expose one
+ * group per device to have sysfs attributes.
+ *
+ * NOTE: New drivers should not make use of this API; instead new device
+ * parameter should be exposed via netlink command. This API and mechanism
+ * exist only for existing drivers.
+ */
+static inline void
+rdma_set_device_sysfs_group(struct ib_device *dev,
+			    const struct attribute_group *group)
+{
+	dev->groups[1] = group;
+}
+
+/**
+ * rdma_device_to_ibdev - Get ib_device pointer from device pointer
+ *
+ * @device:	device pointer for which ib_device pointer to retrieve
+ *
+ * rdma_device_to_ibdev() retrieves ib_device pointer from device.
+ *
+ */
+static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
+{
+	struct ib_core_device *coredev =
+		container_of(device, struct ib_core_device, dev);
+
+	return coredev->owner;
+}
+
+/**
+ * ibdev_to_node - return the NUMA node for a given ib_device
+ * @dev:	device to get the NUMA node for.
+ */
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+	struct device *parent = ibdev->dev.parent;
+
+	if (!parent)
+		return NUMA_NO_NODE;
+	return dev_to_node(parent);
+}
+
+/**
+ * rdma_device_to_drv_device - Helper macro to reach back to driver's
+ *			       ib_device holder structure from device pointer.
+ *
+ * NOTE: New drivers should not make use of this API; This API is only for
+ * existing drivers who have exposed sysfs entries using
+ * rdma_set_device_sysfs_group().
+ */
+#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member)           \
+	container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
+
+bool rdma_dev_access_netns(const struct ib_device *device,
+			   const struct net *net);
+
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000)
+#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF)
+#define IB_GRH_FLOWLABEL_MASK (0x000FFFFF)
+
+/**
+ * rdma_flow_label_to_udp_sport - generate a RoCE v2 UDP src port value based
+ *                               on the flow_label
+ *
+ * This function will convert the 20 bit flow_label input to a valid RoCE v2
+ * UDP src port 14 bit value. All RoCE V2 drivers should use this same
+ * convention.
+ */
+static inline u16 rdma_flow_label_to_udp_sport(u32 fl)
+{
+	u32 fl_low = fl & 0x03fff, fl_high = fl & 0xFC000;
+
+	fl_low ^= fl_high >> 14;
+	return (u16)(fl_low | IB_ROCE_UDP_ENCAP_VALID_PORT_MIN);
+}
+
+/**
+ * rdma_calc_flow_label - generate a RDMA symmetric flow label value based on
+ *                        local and remote qpn values
+ *
+ * This function folded the multiplication results of two qpns, 24 bit each,
+ * fields, and converts it to a 20 bit results.
+ *
+ * This function will create symmetric flow_label value based on the local
+ * and remote qpn values. this will allow both the requester and responder
+ * to calculate the same flow_label for a given connection.
+ *
+ * This helper function should be used by driver in case the upper layer
+ * provide a zero flow_label value. This is to improve entropy of RDMA
+ * traffic in the network.
+ */
+static inline u32 rdma_calc_flow_label(u32 lqpn, u32 rqpn)
+{
+	u64 v = (u64)lqpn * rqpn;
+
+	v ^= v >> 20;
+	v ^= v >> 40;
+
+	return (u32)(v & IB_GRH_FLOWLABEL_MASK);
+}
+#endif /* IB_VERBS_H */
diff --git a/include/rdma/iba.h b/include/rdma/iba.h
new file mode 100644
index 000000000..6a1115b02
--- /dev/null
+++ b/include/rdma/iba.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc.  All rights reserved.
+ */
+#ifndef _IBA_DEFS_H_
+#define _IBA_DEFS_H_
+
+#include <linux/kernel.h>
+#include <linux/bitfield.h>
+#include <asm/unaligned.h>
+
+static inline u32 _iba_get8(const u8 *ptr)
+{
+	return *ptr;
+}
+
+static inline void _iba_set8(u8 *ptr, u32 mask, u32 prep_value)
+{
+	*ptr = (*ptr & ~mask) | prep_value;
+}
+
+static inline u16 _iba_get16(const __be16 *ptr)
+{
+	return be16_to_cpu(*ptr);
+}
+
+static inline void _iba_set16(__be16 *ptr, u16 mask, u16 prep_value)
+{
+	*ptr = cpu_to_be16((be16_to_cpu(*ptr) & ~mask) | prep_value);
+}
+
+static inline u32 _iba_get32(const __be32 *ptr)
+{
+	return be32_to_cpu(*ptr);
+}
+
+static inline void _iba_set32(__be32 *ptr, u32 mask, u32 prep_value)
+{
+	*ptr = cpu_to_be32((be32_to_cpu(*ptr) & ~mask) | prep_value);
+}
+
+static inline u64 _iba_get64(const __be64 *ptr)
+{
+	/*
+	 * The mads are constructed so that 32 bit and smaller are naturally
+	 * aligned, everything larger has a max alignment of 4 bytes.
+	 */
+	return be64_to_cpu(get_unaligned(ptr));
+}
+
+static inline void _iba_set64(__be64 *ptr, u64 mask, u64 prep_value)
+{
+	put_unaligned(cpu_to_be64((_iba_get64(ptr) & ~mask) | prep_value), ptr);
+}
+
+#define _IBA_SET(field_struct, field_offset, field_mask, num_bits, ptr, value) \
+	({                                                                     \
+		field_struct *_ptr = ptr;                                      \
+		_iba_set##num_bits((void *)_ptr + (field_offset), field_mask,  \
+				   FIELD_PREP(field_mask, value));             \
+	})
+#define IBA_SET(field, ptr, value) _IBA_SET(field, ptr, value)
+
+#define _IBA_GET_MEM_PTR(field_struct, field_offset, type, num_bits, ptr)      \
+	({                                                                     \
+		field_struct *_ptr = ptr;                                      \
+		(type *)((void *)_ptr + (field_offset));                       \
+	})
+#define IBA_GET_MEM_PTR(field, ptr) _IBA_GET_MEM_PTR(field, ptr)
+
+/* FIXME: A set should always set the entire field, meaning we should zero the trailing bytes */
+#define _IBA_SET_MEM(field_struct, field_offset, type, num_bits, ptr, in,      \
+		     bytes)                                                    \
+	({                                                                     \
+		const type *_in_ptr = in;                                      \
+		WARN_ON(bytes * 8 > num_bits);                                 \
+		if (in && bytes)                                               \
+			memcpy(_IBA_GET_MEM_PTR(field_struct, field_offset,    \
+						type, num_bits, ptr),          \
+			       _in_ptr, bytes);                                \
+	})
+#define IBA_SET_MEM(field, ptr, in, bytes) _IBA_SET_MEM(field, ptr, in, bytes)
+
+#define _IBA_GET(field_struct, field_offset, field_mask, num_bits, ptr)        \
+	({                                                                     \
+		const field_struct *_ptr = ptr;                                \
+		(u##num_bits) FIELD_GET(                                       \
+			field_mask, _iba_get##num_bits((const void *)_ptr +    \
+						       (field_offset)));       \
+	})
+#define IBA_GET(field, ptr) _IBA_GET(field, ptr)
+
+#define _IBA_GET_MEM(field_struct, field_offset, type, num_bits, ptr, out,     \
+		     bytes)                                                    \
+	({                                                                     \
+		type *_out_ptr = out;                                          \
+		WARN_ON(bytes * 8 > num_bits);                                 \
+		if (out && bytes)                                              \
+			memcpy(_out_ptr,                                       \
+			       _IBA_GET_MEM_PTR(field_struct, field_offset,    \
+						type, num_bits, ptr),          \
+			       bytes);                                         \
+	})
+#define IBA_GET_MEM(field, ptr, out, bytes) _IBA_GET_MEM(field, ptr, out, bytes)
+
+/*
+ * The generated list becomes the parameters to the macros, the order is:
+ *  - struct this applies to
+ *  - starting offset of the max
+ *  - GENMASK or GENMASK_ULL in CPU order
+ *  - The width of data the mask operations should work on, in bits
+ */
+
+/*
+ * Extraction using a tabular description like table 106. bit_offset is from
+ * the Byte[Bit] notation.
+ */
+#define IBA_FIELD_BLOC(field_struct, byte_offset, bit_offset, num_bits)        \
+	field_struct, byte_offset,                                             \
+		GENMASK(7 - (bit_offset), 7 - (bit_offset) - (num_bits - 1)),  \
+		8
+#define IBA_FIELD8_LOC(field_struct, byte_offset, num_bits)                    \
+	IBA_FIELD_BLOC(field_struct, byte_offset, 0, num_bits)
+
+#define IBA_FIELD16_LOC(field_struct, byte_offset, num_bits)                   \
+	field_struct, (byte_offset)&0xFFFE,                                    \
+		GENMASK(15 - (((byte_offset) % 2) * 8),                        \
+			15 - (((byte_offset) % 2) * 8) - (num_bits - 1)),      \
+		16
+
+#define IBA_FIELD32_LOC(field_struct, byte_offset, num_bits)                   \
+	field_struct, (byte_offset)&0xFFFC,                                    \
+		GENMASK(31 - (((byte_offset) % 4) * 8),                        \
+			31 - (((byte_offset) % 4) * 8) - (num_bits - 1)),      \
+		32
+
+#define IBA_FIELD64_LOC(field_struct, byte_offset)                             \
+	field_struct, byte_offset, GENMASK_ULL(63, 0), 64
+/*
+ * In IBTA spec, everything that is more than 64bits is multiple
+ * of bytes without leftover bits.
+ */
+#define IBA_FIELD_MLOC(field_struct, byte_offset, num_bits, type)              \
+	field_struct, byte_offset, type, num_bits
+
+#endif /* _IBA_DEFS_H_ */
diff --git a/include/rdma/ibta_vol1_c12.h b/include/rdma/ibta_vol1_c12.h
new file mode 100644
index 000000000..960c86bec
--- /dev/null
+++ b/include/rdma/ibta_vol1_c12.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020, Mellanox Technologies inc. All rights reserved.
+ *
+ * This file is IBTA volume 1, chapter 12 declarations:
+ *  CHAPTER 12: COMMUNICATION MANAGEMENT
+ */
+#ifndef _IBTA_VOL1_C12_H_
+#define _IBTA_VOL1_C12_H_
+
+#include <rdma/iba.h>
+
+#define CM_FIELD_BLOC(field_struct, byte_offset, bits_offset, width)           \
+	IBA_FIELD_BLOC(field_struct,                                           \
+		       (byte_offset + sizeof(struct ib_mad_hdr)), bits_offset, \
+		       width)
+#define CM_FIELD8_LOC(field_struct, byte_offset, width)                        \
+	IBA_FIELD8_LOC(field_struct,                                           \
+		       (byte_offset + sizeof(struct ib_mad_hdr)), width)
+#define CM_FIELD16_LOC(field_struct, byte_offset, width)                       \
+	IBA_FIELD16_LOC(field_struct,                                          \
+			(byte_offset + sizeof(struct ib_mad_hdr)), width)
+#define CM_FIELD32_LOC(field_struct, byte_offset, width)                       \
+	IBA_FIELD32_LOC(field_struct,                                          \
+			(byte_offset + sizeof(struct ib_mad_hdr)), width)
+#define CM_FIELD64_LOC(field_struct, byte_offset)                              \
+	IBA_FIELD64_LOC(field_struct, (byte_offset + sizeof(struct ib_mad_hdr)))
+#define CM_FIELD_MLOC(field_struct, byte_offset, width, type)                  \
+	IBA_FIELD_MLOC(field_struct,                                           \
+		       (byte_offset + sizeof(struct ib_mad_hdr)), width, type)
+#define CM_STRUCT(field_struct, total_len)                                     \
+	field_struct                                                           \
+	{                                                                      \
+		struct ib_mad_hdr hdr;                                         \
+		u32 _data[(total_len) / 32 +                                   \
+			  BUILD_BUG_ON_ZERO((total_len) % 32 != 0)];           \
+	}
+
+/* Table 106 REQ Message Contents */
+#define CM_REQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_req_msg, 0, 32)
+#define CM_REQ_VENDOR_ID CM_FIELD32_LOC(struct cm_req_msg, 5, 24)
+#define CM_REQ_SERVICE_ID CM_FIELD64_LOC(struct cm_req_msg, 8)
+#define CM_REQ_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_req_msg, 16)
+#define CM_REQ_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_req_msg, 28, 32)
+#define CM_REQ_LOCAL_QPN CM_FIELD32_LOC(struct cm_req_msg, 32, 24)
+#define CM_REQ_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_req_msg, 35, 8)
+#define CM_REQ_LOCAL_EECN CM_FIELD32_LOC(struct cm_req_msg, 36, 24)
+#define CM_REQ_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_req_msg, 39, 8)
+#define CM_REQ_REMOTE_EECN CM_FIELD32_LOC(struct cm_req_msg, 40, 24)
+#define CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT                                      \
+	CM_FIELD8_LOC(struct cm_req_msg, 43, 5)
+#define CM_REQ_TRANSPORT_SERVICE_TYPE CM_FIELD_BLOC(struct cm_req_msg, 43, 5, 2)
+#define CM_REQ_END_TO_END_FLOW_CONTROL                                         \
+	CM_FIELD_BLOC(struct cm_req_msg, 43, 7, 1)
+#define CM_REQ_STARTING_PSN CM_FIELD32_LOC(struct cm_req_msg, 44, 24)
+#define CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 47, 5)
+#define CM_REQ_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 47, 5, 3)
+#define CM_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_req_msg, 48, 16)
+#define CM_REQ_PATH_PACKET_PAYLOAD_MTU CM_FIELD8_LOC(struct cm_req_msg, 50, 4)
+#define CM_REQ_RDC_EXISTS CM_FIELD_BLOC(struct cm_req_msg, 50, 4, 1)
+#define CM_REQ_RNR_RETRY_COUNT CM_FIELD_BLOC(struct cm_req_msg, 50, 5, 3)
+#define CM_REQ_MAX_CM_RETRIES CM_FIELD8_LOC(struct cm_req_msg, 51, 4)
+#define CM_REQ_SRQ CM_FIELD_BLOC(struct cm_req_msg, 51, 4, 1)
+#define CM_REQ_EXTENDED_TRANSPORT_TYPE                                         \
+	CM_FIELD_BLOC(struct cm_req_msg, 51, 5, 3)
+#define CM_REQ_PRIMARY_LOCAL_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 52, 16)
+#define CM_REQ_PRIMARY_REMOTE_PORT_LID CM_FIELD16_LOC(struct cm_req_msg, 54, 16)
+#define CM_REQ_PRIMARY_LOCAL_PORT_GID                                          \
+	CM_FIELD_MLOC(struct cm_req_msg, 56, 128, union ib_gid)
+#define CM_REQ_PRIMARY_REMOTE_PORT_GID                                         \
+	CM_FIELD_MLOC(struct cm_req_msg, 72, 128, union ib_gid)
+#define CM_REQ_PRIMARY_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 88, 20)
+#define CM_REQ_PRIMARY_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 91, 2, 6)
+#define CM_REQ_PRIMARY_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 92, 8)
+#define CM_REQ_PRIMARY_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 93, 8)
+#define CM_REQ_PRIMARY_SL CM_FIELD8_LOC(struct cm_req_msg, 94, 4)
+#define CM_REQ_PRIMARY_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_req_msg, 94, 4, 1)
+#define CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT CM_FIELD8_LOC(struct cm_req_msg, 95, 5)
+#define CM_REQ_ALTERNATE_LOCAL_PORT_LID                                        \
+	CM_FIELD16_LOC(struct cm_req_msg, 96, 16)
+#define CM_REQ_ALTERNATE_REMOTE_PORT_LID                                       \
+	CM_FIELD16_LOC(struct cm_req_msg, 98, 16)
+#define CM_REQ_ALTERNATE_LOCAL_PORT_GID                                        \
+	CM_FIELD_MLOC(struct cm_req_msg, 100, 128, union ib_gid)
+#define CM_REQ_ALTERNATE_REMOTE_PORT_GID                                       \
+	CM_FIELD_MLOC(struct cm_req_msg, 116, 128, union ib_gid)
+#define CM_REQ_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_req_msg, 132, 20)
+#define CM_REQ_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_req_msg, 135, 2, 6)
+#define CM_REQ_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_req_msg, 136, 8)
+#define CM_REQ_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_req_msg, 137, 8)
+#define CM_REQ_ALTERNATE_SL CM_FIELD8_LOC(struct cm_req_msg, 138, 4)
+#define CM_REQ_ALTERNATE_SUBNET_LOCAL                                          \
+	CM_FIELD_BLOC(struct cm_req_msg, 138, 4, 1)
+#define CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT                                     \
+	CM_FIELD8_LOC(struct cm_req_msg, 139, 5)
+#define CM_REQ_SAP_SUPPORTED CM_FIELD_BLOC(struct cm_req_msg, 139, 5, 1)
+#define CM_REQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_req_msg, 140, 736, void)
+CM_STRUCT(struct cm_req_msg, 140 * 8 + 736);
+
+/* Table 107 MRA Message Contents */
+#define CM_MRA_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 0, 32)
+#define CM_MRA_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_mra_msg, 4, 32)
+#define CM_MRA_MESSAGE_MRAED CM_FIELD8_LOC(struct cm_mra_msg, 8, 2)
+#define CM_MRA_SERVICE_TIMEOUT CM_FIELD8_LOC(struct cm_mra_msg, 9, 5)
+#define CM_MRA_PRIVATE_DATA CM_FIELD_MLOC(struct cm_mra_msg, 10, 1776, void)
+CM_STRUCT(struct cm_mra_msg, 10 * 8 + 1776);
+
+/* Table 108 REJ Message Contents */
+#define CM_REJ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 0, 32)
+#define CM_REJ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rej_msg, 4, 32)
+#define CM_REJ_MESSAGE_REJECTED CM_FIELD8_LOC(struct cm_rej_msg, 8, 2)
+#define CM_REJ_REJECTED_INFO_LENGTH CM_FIELD8_LOC(struct cm_rej_msg, 9, 7)
+#define CM_REJ_REASON CM_FIELD16_LOC(struct cm_rej_msg, 10, 16)
+#define CM_REJ_ARI CM_FIELD_MLOC(struct cm_rej_msg, 12, 576, void)
+#define CM_REJ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rej_msg, 84, 1184, void)
+CM_STRUCT(struct cm_rej_msg, 84 * 8 + 1184);
+
+/* Table 110 REP Message Contents */
+#define CM_REP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 0, 32)
+#define CM_REP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rep_msg, 4, 32)
+#define CM_REP_LOCAL_Q_KEY CM_FIELD32_LOC(struct cm_rep_msg, 8, 32)
+#define CM_REP_LOCAL_QPN CM_FIELD32_LOC(struct cm_rep_msg, 12, 24)
+#define CM_REP_VENDOR_ID_H CM_FIELD8_LOC(struct cm_rep_msg, 15, 8)
+#define CM_REP_LOCAL_EE_CONTEXT_NUMBER CM_FIELD32_LOC(struct cm_rep_msg, 16, 24)
+#define CM_REP_VENDOR_ID_M CM_FIELD8_LOC(struct cm_rep_msg, 19, 8)
+#define CM_REP_STARTING_PSN CM_FIELD32_LOC(struct cm_rep_msg, 20, 24)
+#define CM_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_rep_msg, 23, 8)
+#define CM_REP_RESPONDER_RESOURCES CM_FIELD8_LOC(struct cm_rep_msg, 24, 8)
+#define CM_REP_INITIATOR_DEPTH CM_FIELD8_LOC(struct cm_rep_msg, 25, 8)
+#define CM_REP_TARGET_ACK_DELAY CM_FIELD8_LOC(struct cm_rep_msg, 26, 5)
+#define CM_REP_FAILOVER_ACCEPTED CM_FIELD_BLOC(struct cm_rep_msg, 26, 5, 2)
+#define CM_REP_END_TO_END_FLOW_CONTROL                                         \
+	CM_FIELD_BLOC(struct cm_rep_msg, 26, 7, 1)
+#define CM_REP_RNR_RETRY_COUNT CM_FIELD8_LOC(struct cm_rep_msg, 27, 3)
+#define CM_REP_SRQ CM_FIELD_BLOC(struct cm_rep_msg, 27, 3, 1)
+#define CM_REP_LOCAL_CA_GUID CM_FIELD64_LOC(struct cm_rep_msg, 28)
+#define CM_REP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rep_msg, 36, 1568, void)
+CM_STRUCT(struct cm_rep_msg, 36 * 8 + 1568);
+
+/* Table 111 RTU Message Contents */
+#define CM_RTU_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 0, 32)
+#define CM_RTU_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_rtu_msg, 4, 32)
+#define CM_RTU_PRIVATE_DATA CM_FIELD_MLOC(struct cm_rtu_msg, 8, 1792, void)
+CM_STRUCT(struct cm_rtu_msg, 8 * 8 + 1792);
+
+/* Table 112 DREQ Message Contents */
+#define CM_DREQ_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 0, 32)
+#define CM_DREQ_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_dreq_msg, 4, 32)
+#define CM_DREQ_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_dreq_msg, 8, 24)
+#define CM_DREQ_PRIVATE_DATA CM_FIELD_MLOC(struct cm_dreq_msg, 12, 1760, void)
+CM_STRUCT(struct cm_dreq_msg, 12 * 8 + 1760);
+
+/* Table 113 DREP Message Contents */
+#define CM_DREP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 0, 32)
+#define CM_DREP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_drep_msg, 4, 32)
+#define CM_DREP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_drep_msg, 8, 1792, void)
+CM_STRUCT(struct cm_drep_msg, 8 * 8 + 1792);
+
+/* Table 115 LAP Message Contents */
+#define CM_LAP_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 0, 32)
+#define CM_LAP_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_lap_msg, 4, 32)
+#define CM_LAP_REMOTE_QPN_EECN CM_FIELD32_LOC(struct cm_lap_msg, 12, 24)
+#define CM_LAP_REMOTE_CM_RESPONSE_TIMEOUT                                      \
+	CM_FIELD8_LOC(struct cm_lap_msg, 15, 5)
+#define CM_LAP_ALTERNATE_LOCAL_PORT_LID                                        \
+	CM_FIELD16_LOC(struct cm_lap_msg, 20, 16)
+#define CM_LAP_ALTERNATE_REMOTE_PORT_LID                                       \
+	CM_FIELD16_LOC(struct cm_lap_msg, 22, 16)
+#define CM_LAP_ALTERNATE_LOCAL_PORT_GID                                        \
+	CM_FIELD_MLOC(struct cm_lap_msg, 24, 128, union ib_gid)
+#define CM_LAP_ALTERNATE_REMOTE_PORT_GID                                       \
+	CM_FIELD_MLOC(struct cm_lap_msg, 40, 128, union ib_gid)
+#define CM_LAP_ALTERNATE_FLOW_LABEL CM_FIELD32_LOC(struct cm_lap_msg, 56, 20)
+#define CM_LAP_ALTERNATE_TRAFFIC_CLASS CM_FIELD8_LOC(struct cm_lap_msg, 59, 8)
+#define CM_LAP_ALTERNATE_HOP_LIMIT CM_FIELD8_LOC(struct cm_lap_msg, 60, 8)
+#define CM_LAP_ALTERNATE_PACKET_RATE CM_FIELD_BLOC(struct cm_lap_msg, 61, 2, 6)
+#define CM_LAP_ALTERNATE_SL CM_FIELD8_LOC(struct cm_lap_msg, 62, 4)
+#define CM_LAP_ALTERNATE_SUBNET_LOCAL CM_FIELD_BLOC(struct cm_lap_msg, 62, 4, 1)
+#define CM_LAP_ALTERNATE_LOCAL_ACK_TIMEOUT                                     \
+	CM_FIELD8_LOC(struct cm_lap_msg, 63, 5)
+#define CM_LAP_PRIVATE_DATA CM_FIELD_MLOC(struct cm_lap_msg, 64, 1344, void)
+CM_STRUCT(struct cm_lap_msg, 64 * 8 + 1344);
+
+/* Table 116 APR Message Contents */
+#define CM_APR_LOCAL_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 0, 32)
+#define CM_APR_REMOTE_COMM_ID CM_FIELD32_LOC(struct cm_apr_msg, 4, 32)
+#define CM_APR_ADDITIONAL_INFORMATION_LENGTH                                   \
+	CM_FIELD8_LOC(struct cm_apr_msg, 8, 8)
+#define CM_APR_AR_STATUS CM_FIELD8_LOC(struct cm_apr_msg, 9, 8)
+#define CM_APR_ADDITIONAL_INFORMATION                                          \
+	CM_FIELD_MLOC(struct cm_apr_msg, 12, 576, void)
+#define CM_APR_PRIVATE_DATA CM_FIELD_MLOC(struct cm_apr_msg, 84, 1184, void)
+CM_STRUCT(struct cm_apr_msg, 84 * 8 + 1184);
+
+/* Table 119 SIDR_REQ Message Contents */
+#define CM_SIDR_REQ_REQUESTID CM_FIELD32_LOC(struct cm_sidr_req_msg, 0, 32)
+#define CM_SIDR_REQ_PARTITION_KEY CM_FIELD16_LOC(struct cm_sidr_req_msg, 4, 16)
+#define CM_SIDR_REQ_SERVICEID CM_FIELD64_LOC(struct cm_sidr_req_msg, 8)
+#define CM_SIDR_REQ_PRIVATE_DATA                                               \
+	CM_FIELD_MLOC(struct cm_sidr_req_msg, 16, 1728, void)
+CM_STRUCT(struct cm_sidr_req_msg, 16 * 8 + 1728);
+
+/* Table 120 SIDR_REP Message Contents */
+#define CM_SIDR_REP_REQUESTID CM_FIELD32_LOC(struct cm_sidr_rep_msg, 0, 32)
+#define CM_SIDR_REP_STATUS CM_FIELD8_LOC(struct cm_sidr_rep_msg, 4, 8)
+#define CM_SIDR_REP_ADDITIONAL_INFORMATION_LENGTH                              \
+	CM_FIELD8_LOC(struct cm_sidr_rep_msg, 5, 8)
+#define CM_SIDR_REP_VENDOR_ID_H CM_FIELD16_LOC(struct cm_sidr_rep_msg, 6, 16)
+#define CM_SIDR_REP_QPN CM_FIELD32_LOC(struct cm_sidr_rep_msg, 8, 24)
+#define CM_SIDR_REP_VENDOR_ID_L CM_FIELD8_LOC(struct cm_sidr_rep_msg, 11, 8)
+#define CM_SIDR_REP_SERVICEID CM_FIELD64_LOC(struct cm_sidr_rep_msg, 12)
+#define CM_SIDR_REP_Q_KEY CM_FIELD32_LOC(struct cm_sidr_rep_msg, 20, 32)
+#define CM_SIDR_REP_ADDITIONAL_INFORMATION                                     \
+	CM_FIELD_MLOC(struct cm_sidr_rep_msg, 24, 576, void)
+#define CM_SIDR_REP_PRIVATE_DATA                                               \
+	CM_FIELD_MLOC(struct cm_sidr_rep_msg, 96, 1088, void)
+CM_STRUCT(struct cm_sidr_rep_msg, 96 * 8 + 1088);
+
+#endif /* _IBTA_VOL1_C12_H_ */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
new file mode 100644
index 000000000..91975400e
--- /dev/null
+++ b/include/rdma/iw_cm.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ */
+
+#ifndef IW_CM_H
+#define IW_CM_H
+
+#include <linux/in.h>
+#include <rdma/ib_cm.h>
+
+struct iw_cm_id;
+
+enum iw_cm_event_type {
+	IW_CM_EVENT_CONNECT_REQUEST = 1, /* connect request received */
+	IW_CM_EVENT_CONNECT_REPLY,	 /* reply from active connect request */
+	IW_CM_EVENT_ESTABLISHED,	 /* passive side accept successful */
+	IW_CM_EVENT_DISCONNECT,		 /* orderly shutdown */
+	IW_CM_EVENT_CLOSE		 /* close complete */
+};
+
+struct iw_cm_event {
+	enum iw_cm_event_type event;
+	int			 status;
+	struct sockaddr_storage local_addr;
+	struct sockaddr_storage remote_addr;
+	void *private_data;
+	void *provider_data;
+	u8 private_data_len;
+	u8 ord;
+	u8 ird;
+};
+
+/**
+ * iw_cm_handler - Function to be called by the IW CM when delivering events
+ * to the client.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_cm_handler)(struct iw_cm_id *cm_id,
+			     struct iw_cm_event *event);
+
+/**
+ * iw_event_handler - Function called by the provider when delivering provider
+ * events to the IW CM.  Returns either 0 indicating the event was processed
+ * or -errno if the event could not be processed.
+ *
+ * @cm_id: The IW CM identifier associated with the event.
+ * @event: Pointer to the event structure.
+ */
+typedef int (*iw_event_handler)(struct iw_cm_id *cm_id,
+				 struct iw_cm_event *event);
+
+struct iw_cm_id {
+	iw_cm_handler		cm_handler;      /* client callback function */
+	void		        *context;	 /* client cb context */
+	struct ib_device	*device;
+	struct sockaddr_storage local_addr;      /* local addr */
+	struct sockaddr_storage	remote_addr;
+	struct sockaddr_storage m_local_addr;	 /* nmapped local addr */
+	struct sockaddr_storage	m_remote_addr;	 /* nmapped rem addr */
+	void			*provider_data;	 /* provider private data */
+	iw_event_handler        event_handler;   /* cb for provider
+						    events */
+	/* Used by provider to add and remove refs on IW cm_id */
+	void (*add_ref)(struct iw_cm_id *);
+	void (*rem_ref)(struct iw_cm_id *);
+	u8  tos;
+	bool tos_set:1;
+	bool mapped:1;
+};
+
+struct iw_cm_conn_param {
+	const void *private_data;
+	u16 private_data_len;
+	u32 ord;
+	u32 ird;
+	u32 qpn;
+};
+
+enum iw_flags {
+
+	/*
+	 * This flag allows the iwcm and iwpmd to still advertise
+	 * mappings but the real and mapped port numbers are the
+	 * same.  Further, iwpmd will not bind any user socket to
+	 * reserve the port.  This is required for soft iwarp
+	 * to play in the port mapped iwarp space.
+	 */
+	IW_F_NO_PORT_MAP = (1 << 0),
+};
+
+/**
+ * iw_create_cm_id - Create an IW CM identifier.
+ *
+ * @device: The IB device on which to create the IW CM identier.
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned IW CM identifier.
+ * @context: User specified context associated with the id.
+ */
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+				 iw_cm_handler cm_handler, void *context);
+
+/**
+ * iw_destroy_cm_id - Destroy an IW CM identifier.
+ *
+ * @cm_id: The previously created IW CM identifier to destroy.
+ *
+ * The client can assume that no events will be delivered for the CM ID after
+ * this function returns.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id);
+
+/**
+ * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP
+ *
+ * @cm_id: The IW CM idenfier to unbind from the QP.
+ * @qp: The QP
+ *
+ * This is called by the provider when destroying the QP to ensure
+ * that any references held by the IWCM are released. It may also
+ * be called by the IWCM when destroying a CM_ID to that any
+ * references held by the provider are released.
+ */
+void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp);
+
+/**
+ * iw_cm_get_qp - Return the ib_qp associated with a QPN
+ *
+ * @ib_device: The IB device
+ * @qpn: The queue pair number
+ */
+struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn);
+
+/**
+ * iw_cm_listen - Listen for incoming connection requests on the
+ * specified IW CM id.
+ *
+ * @cm_id: The IW CM identifier.
+ * @backlog: The maximum number of outstanding un-accepted inbound listen
+ *   requests to queue.
+ *
+ * The source address and port number are specified in the IW CM identifier
+ * structure.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog);
+
+/**
+ * iw_cm_accept - Called to accept an incoming connect request.
+ *
+ * @cm_id: The IW CM identifier associated with the connection request.
+ * @iw_param: Pointer to a structure containing connection establishment
+ *   parameters.
+ *
+ * The specified cm_id will have been provided in the event data for a
+ * CONNECT_REQUEST event. Subsequent events related to this connection will be
+ * delivered to the specified IW CM identifier prior and may occur prior to
+ * the return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_reject - Reject an incoming connection request.
+ *
+ * @cm_id: Connection identifier associated with the request.
+ * @private_daa: Pointer to data to deliver to the remote peer as part of the
+ *   reject message.
+ * @private_data_len: The number of bytes in the private_data parameter.
+ *
+ * The client can assume that no events will be delivered to the specified IW
+ * CM identifier following the return of this function. The private_data
+ * buffer is available for reuse when this function returns.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data,
+		 u8 private_data_len);
+
+/**
+ * iw_cm_connect - Called to request a connection to a remote peer.
+ *
+ * @cm_id: The IW CM identifier for the connection.
+ * @iw_param: Pointer to a structure containing connection  establishment
+ *   parameters.
+ *
+ * Events may be delivered to the specified IW CM identifier prior to the
+ * return of this function. If this function returns a non-zero value, the
+ * client can assume that no events will be delivered to the specified IW CM
+ * identifier.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param);
+
+/**
+ * iw_cm_disconnect - Close the specified connection.
+ *
+ * @cm_id: The IW CM identifier to close.
+ * @abrupt: If 0, the connection will be closed gracefully, otherwise, the
+ *   connection will be reset.
+ *
+ * The IW CM identifier is still active until the IW_CM_EVENT_CLOSE event is
+ * delivered.
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
+
+/**
+ * iw_cm_init_qp_attr - Called to initialize the attributes of the QP
+ * associated with a IW CM identifier.
+ *
+ * @cm_id: The IW CM identifier associated with the QP
+ * @qp_attr: Pointer to the QP attributes structure.
+ * @qp_attr_mask: Pointer to a bit vector specifying which QP attributes are
+ *   valid.
+ */
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+/**
+ * iwcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ iwcm_reject_msg(int reason);
+
+#endif /* IW_CM_H */
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
new file mode 100644
index 000000000..6dbc1a235
--- /dev/null
+++ b/include/rdma/iw_portmap.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2014 Intel Corporation. All rights reserved.
+ * Copyright (c) 2014 Chelsio, Inc. All rights reserved.
+ */
+
+#ifndef _IW_PORTMAP_H
+#define _IW_PORTMAP_H
+
+#include <linux/socket.h>
+#include <linux/netlink.h>
+
+#define IWPM_ULIBNAME_SIZE	32
+#define IWPM_DEVNAME_SIZE	32
+#define IWPM_IFNAME_SIZE	16
+#define IWPM_IPADDR_SIZE	16
+
+enum {
+	IWPM_INVALID_NLMSG_ERR = 10,
+	IWPM_CREATE_MAPPING_ERR,
+	IWPM_DUPLICATE_MAPPING_ERR,
+	IWPM_UNKNOWN_MAPPING_ERR,
+	IWPM_CLIENT_DEV_INFO_ERR,
+	IWPM_USER_LIB_INFO_ERR,
+	IWPM_REMOTE_QUERY_REJECT
+};
+
+struct iwpm_dev_data {
+	char dev_name[IWPM_DEVNAME_SIZE];
+	char if_name[IWPM_IFNAME_SIZE];
+};
+
+struct iwpm_sa_data {
+	struct sockaddr_storage loc_addr;
+	struct sockaddr_storage mapped_loc_addr;
+	struct sockaddr_storage rem_addr;
+	struct sockaddr_storage mapped_rem_addr;
+	u32 flags;
+};
+
+int iwpm_init(u8);
+int iwpm_exit(u8);
+int iwpm_valid_pid(void);
+int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
+int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
+int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
+int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
+int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
+int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
+int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *);
+int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
+int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
+int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
+			struct sockaddr_storage *mapped_rem_addr,
+			struct sockaddr_storage *remote_addr, u8 nl_client);
+int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
+			struct sockaddr_storage *mapped_addr, u8 nl_client,
+			u32 map_flags);
+int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
+			struct sockaddr_storage *mapped_addr);
+
+int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb);
+#endif /* _IW_PORTMAP_H */
diff --git a/include/rdma/lag.h b/include/rdma/lag.h
new file mode 100644
index 000000000..7c06ec9b2
--- /dev/null
+++ b/include/rdma/lag.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_LAG_H_
+#define _RDMA_LAG_H_
+
+#include <net/lag.h>
+
+struct ib_device;
+struct rdma_ah_attr;
+
+enum rdma_lag_flags {
+	RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0
+};
+
+void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave);
+struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device,
+					      struct rdma_ah_attr *ah_attr,
+					      gfp_t flags);
+
+#endif /* _RDMA_LAG_H_ */
diff --git a/include/rdma/mr_pool.h b/include/rdma/mr_pool.h
new file mode 100644
index 000000000..e77123bcb
--- /dev/null
+++ b/include/rdma/mr_pool.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ */
+#ifndef _RDMA_MR_POOL_H
+#define _RDMA_MR_POOL_H 1
+
+#include <rdma/ib_verbs.h>
+
+struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list);
+void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr);
+
+int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr,
+		enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg);
+void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list);
+
+#endif /* _RDMA_MR_POOL_H */
diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h
new file mode 100644
index 000000000..c6bfa2640
--- /dev/null
+++ b/include/rdma/opa_addr.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ */
+
+#ifndef OPA_ADDR_H
+#define OPA_ADDR_H
+
+#include <rdma/opa_smi.h>
+
+#define	OPA_SPECIAL_OUI		(0x00066AULL)
+#define OPA_MAKE_ID(x)          (cpu_to_be64(OPA_SPECIAL_OUI << 40 | (x)))
+#define OPA_TO_IB_UCAST_LID(x) (((x) >= be16_to_cpu(IB_MULTICAST_LID_BASE)) \
+				? 0 : x)
+#define OPA_GID_INDEX		0x1
+/**
+ * 0xF8 - 4 bits of multicast range and 1 bit for collective range
+ * Example: For 24 bit LID space,
+ * Multicast range: 0xF00000 to 0xF7FFFF
+ * Collective range: 0xF80000 to 0xFFFFFE
+ */
+#define OPA_MCAST_NR 0x4 /* Number of top bits set */
+#define OPA_COLLECTIVE_NR 0x1 /* Number of bits after MCAST_NR */
+
+/**
+ * ib_is_opa_gid: Returns true if the top 24 bits of the gid
+ * contains the OPA_STL_OUI identifier. This identifies that
+ * the provided gid is a special purpose GID meant to carry
+ * extended LID information.
+ *
+ * @gid: The Global identifier
+ */
+static inline bool ib_is_opa_gid(const union ib_gid *gid)
+{
+	return ((be64_to_cpu(gid->global.interface_id) >> 40) ==
+		OPA_SPECIAL_OUI);
+}
+
+/**
+ * opa_get_lid_from_gid: Returns the last 32 bits of the gid.
+ * OPA devices use one of the gids in the gid table to also
+ * store the lid.
+ *
+ * @gid: The Global identifier
+ */
+static inline u32 opa_get_lid_from_gid(const union ib_gid *gid)
+{
+	return be64_to_cpu(gid->global.interface_id) & 0xFFFFFFFF;
+}
+
+/**
+ * opa_is_extended_lid: Returns true if dlid or slid are
+ * extended.
+ *
+ * @dlid: The DLID
+ * @slid: The SLID
+ */
+static inline bool opa_is_extended_lid(__be32 dlid, __be32 slid)
+{
+	if ((be32_to_cpu(dlid) >=
+	     be16_to_cpu(IB_MULTICAST_LID_BASE)) ||
+	    (be32_to_cpu(slid) >=
+	     be16_to_cpu(IB_MULTICAST_LID_BASE)))
+		return true;
+
+	return false;
+}
+
+/* Get multicast lid base */
+static inline u32 opa_get_mcast_base(u32 nr_top_bits)
+{
+	return (be32_to_cpu(OPA_LID_PERMISSIVE) << (32 - nr_top_bits));
+}
+
+/* Check for a valid unicast LID for non-SM traffic types */
+static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr)
+{
+	if (attr->type == RDMA_AH_ATTR_TYPE_IB) {
+		if (!rdma_ah_get_dlid(attr) ||
+		    rdma_ah_get_dlid(attr) >=
+		    be16_to_cpu(IB_MULTICAST_LID_BASE))
+			return false;
+	} else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) {
+		if (!rdma_ah_get_dlid(attr) ||
+		    rdma_ah_get_dlid(attr) >=
+		    opa_get_mcast_base(OPA_MCAST_NR))
+			return false;
+	}
+	return true;
+}
+#endif /* OPA_ADDR_H */
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
new file mode 100644
index 000000000..73bcac90a
--- /dev/null
+++ b/include/rdma/opa_port_info.h
@@ -0,0 +1,385 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2014-2020 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef OPA_PORT_INFO_H
+#define OPA_PORT_INFO_H
+
+#include <rdma/opa_smi.h>
+
+#define OPA_PORT_LINK_MODE_NOP	0		/* No change */
+#define OPA_PORT_LINK_MODE_OPA	4		/* Port mode is OPA */
+
+#define OPA_PORT_PACKET_FORMAT_NOP	0		/* No change */
+#define OPA_PORT_PACKET_FORMAT_8B	1		/* Format 8B */
+#define OPA_PORT_PACKET_FORMAT_9B	2		/* Format 9B */
+#define OPA_PORT_PACKET_FORMAT_10B	4		/* Format 10B */
+#define OPA_PORT_PACKET_FORMAT_16B	8		/* Format 16B */
+
+#define OPA_PORT_LTP_CRC_MODE_NONE	0	/* No change */
+#define OPA_PORT_LTP_CRC_MODE_14	1	/* 14-bit LTP CRC mode (optional) */
+#define OPA_PORT_LTP_CRC_MODE_16	2	/* 16-bit LTP CRC mode */
+#define OPA_PORT_LTP_CRC_MODE_48	4	/* 48-bit LTP CRC mode (optional) */
+#define OPA_PORT_LTP_CRC_MODE_PER_LANE  8	/* 12/16-bit per lane LTP CRC mode */
+
+/* Link Down / Neighbor Link Down Reason; indicated as follows: */
+#define OPA_LINKDOWN_REASON_NONE				0	/* No specified reason */
+#define OPA_LINKDOWN_REASON_RCV_ERROR_0				1
+#define OPA_LINKDOWN_REASON_BAD_PKT_LEN				2
+#define OPA_LINKDOWN_REASON_PKT_TOO_LONG			3
+#define OPA_LINKDOWN_REASON_PKT_TOO_SHORT			4
+#define OPA_LINKDOWN_REASON_BAD_SLID				5
+#define OPA_LINKDOWN_REASON_BAD_DLID				6
+#define OPA_LINKDOWN_REASON_BAD_L2				7
+#define OPA_LINKDOWN_REASON_BAD_SC				8
+#define OPA_LINKDOWN_REASON_RCV_ERROR_8				9
+#define OPA_LINKDOWN_REASON_BAD_MID_TAIL			10
+#define OPA_LINKDOWN_REASON_RCV_ERROR_10			11
+#define OPA_LINKDOWN_REASON_PREEMPT_ERROR			12
+#define OPA_LINKDOWN_REASON_PREEMPT_VL15			13
+#define OPA_LINKDOWN_REASON_BAD_VL_MARKER			14
+#define OPA_LINKDOWN_REASON_RCV_ERROR_14			15
+#define OPA_LINKDOWN_REASON_RCV_ERROR_15			16
+#define OPA_LINKDOWN_REASON_BAD_HEAD_DIST			17
+#define OPA_LINKDOWN_REASON_BAD_TAIL_DIST			18
+#define OPA_LINKDOWN_REASON_BAD_CTRL_DIST			19
+#define OPA_LINKDOWN_REASON_BAD_CREDIT_ACK			20
+#define OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER		21
+#define OPA_LINKDOWN_REASON_BAD_PREEMPT				22
+#define OPA_LINKDOWN_REASON_BAD_CONTROL_FLIT			23
+#define OPA_LINKDOWN_REASON_EXCEED_MULTICAST_LIMIT		24
+#define OPA_LINKDOWN_REASON_RCV_ERROR_24			25
+#define OPA_LINKDOWN_REASON_RCV_ERROR_25			26
+#define OPA_LINKDOWN_REASON_RCV_ERROR_26			27
+#define OPA_LINKDOWN_REASON_RCV_ERROR_27			28
+#define OPA_LINKDOWN_REASON_RCV_ERROR_28			29
+#define OPA_LINKDOWN_REASON_RCV_ERROR_29			30
+#define OPA_LINKDOWN_REASON_RCV_ERROR_30			31
+#define OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN		32
+#define OPA_LINKDOWN_REASON_UNKNOWN				33
+/* 34 -reserved */
+#define OPA_LINKDOWN_REASON_REBOOT				35
+#define OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN			36
+/* 37-38 reserved */
+#define OPA_LINKDOWN_REASON_FM_BOUNCE				39
+#define OPA_LINKDOWN_REASON_SPEED_POLICY			40
+#define OPA_LINKDOWN_REASON_WIDTH_POLICY			41
+/* 42-48 reserved */
+#define OPA_LINKDOWN_REASON_DISCONNECTED			49
+#define OPA_LINKDOWN_REASON_LOCAL_MEDIA_NOT_INSTALLED		50
+#define OPA_LINKDOWN_REASON_NOT_INSTALLED			51
+#define OPA_LINKDOWN_REASON_CHASSIS_CONFIG			52
+/* 53 reserved */
+#define OPA_LINKDOWN_REASON_END_TO_END_NOT_INSTALLED		54
+/* 55 reserved */
+#define OPA_LINKDOWN_REASON_POWER_POLICY			56
+#define OPA_LINKDOWN_REASON_LINKSPEED_POLICY			57
+#define OPA_LINKDOWN_REASON_LINKWIDTH_POLICY			58
+/* 59 reserved */
+#define OPA_LINKDOWN_REASON_SWITCH_MGMT				60
+#define OPA_LINKDOWN_REASON_SMA_DISABLED			61
+/* 62 reserved */
+#define OPA_LINKDOWN_REASON_TRANSIENT				63
+/* 64-255 reserved */
+
+/* OPA Link Init reason; indicated as follows: */
+/* 3-7; 11-15 reserved; 8-15 cleared on Polling->LinkUp */
+#define OPA_LINKINIT_REASON_NOP                 0
+#define OPA_LINKINIT_REASON_LINKUP              (1 << 4)
+#define OPA_LINKINIT_REASON_FLAPPING            (2 << 4)
+#define OPA_LINKINIT_REASON_CLEAR               (8 << 4)
+#define OPA_LINKINIT_OUTSIDE_POLICY             (8 << 4)
+#define OPA_LINKINIT_QUARANTINED                (9 << 4)
+#define OPA_LINKINIT_INSUFIC_CAPABILITY         (10 << 4)
+
+#define OPA_LINK_SPEED_NOP              0x0000  /*  Reserved (1-5 Gbps) */
+#define OPA_LINK_SPEED_12_5G            0x0001  /*  12.5 Gbps */
+#define OPA_LINK_SPEED_25G              0x0002  /*  25.78125?  Gbps (EDR) */
+
+#define OPA_LINK_WIDTH_1X            0x0001
+#define OPA_LINK_WIDTH_2X            0x0002
+#define OPA_LINK_WIDTH_3X            0x0004
+#define OPA_LINK_WIDTH_4X            0x0008
+
+#define OPA_CAP_MASK3_IsEthOnFabricSupported      (1 << 13)
+#define OPA_CAP_MASK3_IsSnoopSupported            (1 << 7)
+#define OPA_CAP_MASK3_IsAsyncSC2VLSupported       (1 << 6)
+#define OPA_CAP_MASK3_IsAddrRangeConfigSupported  (1 << 5)
+#define OPA_CAP_MASK3_IsPassThroughSupported      (1 << 4)
+#define OPA_CAP_MASK3_IsSharedSpaceSupported      (1 << 3)
+/* reserved (1 << 2) */
+#define OPA_CAP_MASK3_IsVLMarkerSupported         (1 << 1)
+#define OPA_CAP_MASK3_IsVLrSupported              (1 << 0)
+
+enum {
+	OPA_PORT_PHYS_CONF_DISCONNECTED = 0,
+	OPA_PORT_PHYS_CONF_STANDARD     = 1,
+	OPA_PORT_PHYS_CONF_FIXED        = 2,
+	OPA_PORT_PHYS_CONF_VARIABLE     = 3,
+	OPA_PORT_PHYS_CONF_SI_PHOTO     = 4
+};
+
+enum port_info_field_masks {
+	/* vl.cap */
+	OPA_PI_MASK_VL_CAP                        = 0x1F,
+	/* port_states.ledenable_offlinereason */
+	OPA_PI_MASK_OFFLINE_REASON                = 0x0F,
+	OPA_PI_MASK_LED_ENABLE                    = 0x40,
+	/* port_states.unsleepstate_downdefstate */
+	OPA_PI_MASK_UNSLEEP_STATE                 = 0xF0,
+	OPA_PI_MASK_DOWNDEF_STATE                 = 0x0F,
+	/* port_states.portphysstate_portstate */
+	OPA_PI_MASK_PORT_PHYSICAL_STATE           = 0xF0,
+	OPA_PI_MASK_PORT_STATE                    = 0x0F,
+	/* port_phys_conf */
+	OPA_PI_MASK_PORT_PHYSICAL_CONF            = 0x0F,
+	/* collectivemask_multicastmask */
+	OPA_PI_MASK_COLLECT_MASK                  = 0x38,
+	OPA_PI_MASK_MULTICAST_MASK                = 0x07,
+	/* mkeyprotect_lmc */
+	OPA_PI_MASK_MKEY_PROT_BIT                 = 0xC0,
+	OPA_PI_MASK_LMC                           = 0x0F,
+	/* smsl */
+	OPA_PI_MASK_SMSL                          = 0x1F,
+	/* partenforce_filterraw */
+	/* Filter Raw In/Out bits 1 and 2 were removed */
+	OPA_PI_MASK_LINKINIT_REASON               = 0xF0,
+	OPA_PI_MASK_PARTITION_ENFORCE_IN          = 0x08,
+	OPA_PI_MASK_PARTITION_ENFORCE_OUT         = 0x04,
+	/* operational_vls */
+	OPA_PI_MASK_OPERATIONAL_VL                = 0x1F,
+	/* sa_qp */
+	OPA_PI_MASK_SA_QP                         = 0x00FFFFFF,
+	/* sm_trap_qp */
+	OPA_PI_MASK_SM_TRAP_QP                    = 0x00FFFFFF,
+	/* localphy_overrun_errors */
+	OPA_PI_MASK_LOCAL_PHY_ERRORS              = 0xF0,
+	OPA_PI_MASK_OVERRUN_ERRORS                = 0x0F,
+	/* clientrereg_subnettimeout */
+	OPA_PI_MASK_CLIENT_REREGISTER             = 0x80,
+	OPA_PI_MASK_SUBNET_TIMEOUT                = 0x1F,
+	/* port_link_mode */
+	OPA_PI_MASK_PORT_LINK_SUPPORTED           = (0x001F << 10),
+	OPA_PI_MASK_PORT_LINK_ENABLED             = (0x001F <<  5),
+	OPA_PI_MASK_PORT_LINK_ACTIVE              = (0x001F <<  0),
+	/* port_link_crc_mode */
+	OPA_PI_MASK_PORT_LINK_CRC_SUPPORTED       = 0x0F00,
+	OPA_PI_MASK_PORT_LINK_CRC_ENABLED         = 0x00F0,
+	OPA_PI_MASK_PORT_LINK_CRC_ACTIVE          = 0x000F,
+	/* port_mode */
+	OPA_PI_MASK_PORT_MODE_SECURITY_CHECK      = 0x0001,
+	OPA_PI_MASK_PORT_MODE_16B_TRAP_QUERY      = 0x0002,
+	OPA_PI_MASK_PORT_MODE_PKEY_CONVERT        = 0x0004,
+	OPA_PI_MASK_PORT_MODE_SC2SC_MAPPING       = 0x0008,
+	OPA_PI_MASK_PORT_MODE_VL_MARKER           = 0x0010,
+	OPA_PI_MASK_PORT_PASS_THROUGH             = 0x0020,
+	OPA_PI_MASK_PORT_ACTIVE_OPTOMIZE          = 0x0040,
+	/* flit_control.interleave */
+	OPA_PI_MASK_INTERLEAVE_DIST_SUP           = (0x0003 << 12),
+	OPA_PI_MASK_INTERLEAVE_DIST_ENABLE        = (0x0003 << 10),
+	OPA_PI_MASK_INTERLEAVE_MAX_NEST_TX        = (0x001F <<  5),
+	OPA_PI_MASK_INTERLEAVE_MAX_NEST_RX        = (0x001F <<  0),
+
+	/* port_error_action */
+	OPA_PI_MASK_EX_BUFFER_OVERRUN                  = 0x80000000,
+		/* 7 bits reserved */
+	OPA_PI_MASK_FM_CFG_ERR_EXCEED_MULTICAST_LIMIT  = 0x00800000,
+	OPA_PI_MASK_FM_CFG_BAD_CONTROL_FLIT            = 0x00400000,
+	OPA_PI_MASK_FM_CFG_BAD_PREEMPT                 = 0x00200000,
+	OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER       = 0x00100000,
+	OPA_PI_MASK_FM_CFG_BAD_CRDT_ACK                = 0x00080000,
+	OPA_PI_MASK_FM_CFG_BAD_CTRL_DIST               = 0x00040000,
+	OPA_PI_MASK_FM_CFG_BAD_TAIL_DIST               = 0x00020000,
+	OPA_PI_MASK_FM_CFG_BAD_HEAD_DIST               = 0x00010000,
+		/* 2 bits reserved */
+	OPA_PI_MASK_PORT_RCV_BAD_VL_MARKER             = 0x00002000,
+	OPA_PI_MASK_PORT_RCV_PREEMPT_VL15              = 0x00001000,
+	OPA_PI_MASK_PORT_RCV_PREEMPT_ERROR             = 0x00000800,
+		/* 1 bit reserved */
+	OPA_PI_MASK_PORT_RCV_BAD_MidTail               = 0x00000200,
+		/* 1 bit reserved */
+	OPA_PI_MASK_PORT_RCV_BAD_SC                    = 0x00000080,
+	OPA_PI_MASK_PORT_RCV_BAD_L2                    = 0x00000040,
+	OPA_PI_MASK_PORT_RCV_BAD_DLID                  = 0x00000020,
+	OPA_PI_MASK_PORT_RCV_BAD_SLID                  = 0x00000010,
+	OPA_PI_MASK_PORT_RCV_PKTLEN_TOOSHORT           = 0x00000008,
+	OPA_PI_MASK_PORT_RCV_PKTLEN_TOOLONG            = 0x00000004,
+	OPA_PI_MASK_PORT_RCV_BAD_PKTLEN                = 0x00000002,
+	OPA_PI_MASK_PORT_RCV_BAD_LT                    = 0x00000001,
+
+	/* pass_through.res_drctl */
+	OPA_PI_MASK_PASS_THROUGH_DR_CONTROL       = 0x01,
+
+	/* buffer_units */
+	OPA_PI_MASK_BUF_UNIT_VL15_INIT            = (0x00000FFF  << 11),
+	OPA_PI_MASK_BUF_UNIT_VL15_CREDIT_RATE     = (0x0000001F  <<  6),
+	OPA_PI_MASK_BUF_UNIT_CREDIT_ACK           = (0x00000003  <<  3),
+	OPA_PI_MASK_BUF_UNIT_BUF_ALLOC            = (0x00000003  <<  0),
+
+	/* neigh_mtu.pvlx_to_mtu */
+	OPA_PI_MASK_NEIGH_MTU_PVL0                = 0xF0,
+	OPA_PI_MASK_NEIGH_MTU_PVL1                = 0x0F,
+
+	/* neigh_mtu.vlstall_hoq_life */
+	OPA_PI_MASK_VL_STALL                      = (0x03 << 5),
+	OPA_PI_MASK_HOQ_LIFE                      = (0x1F << 0),
+
+	/* port_neigh_mode */
+	OPA_PI_MASK_NEIGH_MGMT_ALLOWED            = (0x01 << 3),
+	OPA_PI_MASK_NEIGH_FW_AUTH_BYPASS          = (0x01 << 2),
+	OPA_PI_MASK_NEIGH_NODE_TYPE               = (0x03 << 0),
+
+	/* resptime_value */
+	OPA_PI_MASK_RESPONSE_TIME_VALUE           = 0x1F,
+
+	/* mtucap */
+	OPA_PI_MASK_MTU_CAP                       = 0x0F,
+};
+
+struct opa_port_states {
+	u8     reserved;
+	u8     ledenable_offlinereason;   /* 1 res, 1 bit, 6 bits */
+	u8     reserved2;
+	u8     portphysstate_portstate;   /* 4 bits, 4 bits */
+};
+
+struct opa_port_state_info {
+	struct opa_port_states port_states;
+	__be16 link_width_downgrade_tx_active;
+	__be16 link_width_downgrade_rx_active;
+};
+
+struct opa_port_info {
+	__be32 lid;
+	__be32 flow_control_mask;
+
+	struct {
+		u8     res;                       /* was inittype */
+		u8     cap;                       /* 3 res, 5 bits */
+		__be16 high_limit;
+		__be16 preempt_limit;
+		u8     arb_high_cap;
+		u8     arb_low_cap;
+	} vl;
+
+	struct opa_port_states  port_states;
+	u8     port_phys_conf;                    /* 4 res, 4 bits */
+	u8     collectivemask_multicastmask;      /* 2 res, 3, 3 */
+	u8     mkeyprotect_lmc;                   /* 2 bits, 2 res, 4 bits */
+	u8     smsl;                              /* 3 res, 5 bits */
+
+	u8     partenforce_filterraw;             /* bit fields */
+	u8     operational_vls;                    /* 3 res, 5 bits */
+	__be16 pkey_8b;
+	__be16 pkey_10b;
+	__be16 mkey_violations;
+
+	__be16 pkey_violations;
+	__be16 qkey_violations;
+	__be32 sm_trap_qp;                        /* 8 bits, 24 bits */
+
+	__be32 sa_qp;                             /* 8 bits, 24 bits */
+	u8     neigh_port_num;
+	u8     link_down_reason;
+	u8     neigh_link_down_reason;
+	u8     clientrereg_subnettimeout;	  /* 1 bit, 2 bits, 5 */
+
+	struct {
+		__be16 supported;
+		__be16 enabled;
+		__be16 active;
+	} link_speed;
+	struct {
+		__be16 supported;
+		__be16 enabled;
+		__be16 active;
+	} link_width;
+	struct {
+		__be16 supported;
+		__be16 enabled;
+		__be16 tx_active;
+		__be16 rx_active;
+	} link_width_downgrade;
+	__be16 port_link_mode;                  /* 1 res, 5 bits, 5 bits, 5 bits */
+	__be16 port_ltp_crc_mode;               /* 4 res, 4 bits, 4 bits, 4 bits */
+
+	__be16 port_mode;                       /* 9 res, bit fields */
+	struct {
+		__be16 supported;
+		__be16 enabled;
+	} port_packet_format;
+	struct {
+		__be16 interleave;  /* 2 res, 2,2,5,5 */
+		struct {
+			__be16 min_initial;
+			__be16 min_tail;
+			u8     large_pkt_limit;
+			u8     small_pkt_limit;
+			u8     max_small_pkt_limit;
+			u8     preemption_limit;
+		} preemption;
+	} flit_control;
+
+	__be32 reserved4;
+	__be32 port_error_action; /* bit field */
+
+	struct {
+		u8 egress_port;
+		u8 res_drctl;                    /* 7 res, 1 */
+	} pass_through;
+	__be16 mkey_lease_period;
+	__be32 buffer_units;                     /* 9 res, 12, 5, 3, 3 */
+
+	__be32 reserved5;
+	__be32 sm_lid;
+
+	__be64 mkey;
+
+	__be64 subnet_prefix;
+
+	struct {
+		u8 pvlx_to_mtu[OPA_MAX_VLS/2]; /* 4 bits, 4 bits */
+	} neigh_mtu;
+
+	struct {
+		u8 vlstall_hoqlife;             /* 3 bits, 5 bits */
+	} xmit_q[OPA_MAX_VLS];
+
+	struct {
+		u8 addr[16];
+	} ipaddr_ipv6;
+
+	struct {
+		u8 addr[4];
+	} ipaddr_ipv4;
+
+	u32    reserved6;
+	u32    reserved7;
+	u32    reserved8;
+
+	__be64 neigh_node_guid;
+
+	__be32 ib_cap_mask;
+	__be16 reserved9;                    /* was ib_cap_mask2 */
+	__be16 opa_cap_mask;
+
+	__be32 reserved10;                   /* was link_roundtrip_latency */
+	__be16 overall_buffer_space;
+	__be16 reserved11;                   /* was max_credit_hint */
+
+	__be16 diag_code;
+	struct {
+		u8 buffer;
+		u8 wire;
+	} replay_depth;
+	u8     port_neigh_mode;
+	u8     mtucap;                          /* 4 res, 4 bits */
+
+	u8     resptimevalue;		        /* 3 res, 5 bits */
+	u8     local_port_num;
+	u8     reserved12;
+	u8     reserved13;                       /* was guid_cap */
+} __packed;
+
+#endif /* OPA_PORT_INFO_H */
diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h
new file mode 100644
index 000000000..a9f1b5700
--- /dev/null
+++ b/include/rdma/opa_smi.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2014 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef OPA_SMI_H
+#define OPA_SMI_H
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_smi.h>
+
+#define OPA_SMP_LID_DATA_SIZE			2016
+#define OPA_SMP_DR_DATA_SIZE			1872
+#define OPA_SMP_MAX_PATH_HOPS			64
+
+#define OPA_MAX_VLS				32
+#define OPA_MAX_SLS				32
+#define OPA_MAX_SCS				32
+
+#define OPA_LID_PERMISSIVE			cpu_to_be32(0xFFFFFFFF)
+
+struct opa_smp {
+	u8	base_version;
+	u8	mgmt_class;
+	u8	class_version;
+	u8	method;
+	__be16	status;
+	u8	hop_ptr;
+	u8	hop_cnt;
+	__be64	tid;
+	__be16	attr_id;
+	__be16	resv;
+	__be32	attr_mod;
+	__be64	mkey;
+	union {
+		struct {
+			uint8_t data[OPA_SMP_LID_DATA_SIZE];
+		} lid;
+		struct {
+			__be32	dr_slid;
+			__be32	dr_dlid;
+			u8	initial_path[OPA_SMP_MAX_PATH_HOPS];
+			u8	return_path[OPA_SMP_MAX_PATH_HOPS];
+			u8	reserved[8];
+			u8	data[OPA_SMP_DR_DATA_SIZE];
+		} dr;
+	} route;
+} __packed;
+
+
+/* Subnet management attributes */
+/* ... */
+#define OPA_ATTRIB_ID_NODE_DESCRIPTION		cpu_to_be16(0x0010)
+#define OPA_ATTRIB_ID_NODE_INFO			cpu_to_be16(0x0011)
+#define OPA_ATTRIB_ID_PORT_INFO			cpu_to_be16(0x0015)
+#define OPA_ATTRIB_ID_PARTITION_TABLE		cpu_to_be16(0x0016)
+#define OPA_ATTRIB_ID_SL_TO_SC_MAP		cpu_to_be16(0x0017)
+#define OPA_ATTRIB_ID_VL_ARBITRATION		cpu_to_be16(0x0018)
+#define OPA_ATTRIB_ID_SM_INFO			cpu_to_be16(0x0020)
+#define OPA_ATTRIB_ID_CABLE_INFO		cpu_to_be16(0x0032)
+#define OPA_ATTRIB_ID_AGGREGATE			cpu_to_be16(0x0080)
+#define OPA_ATTRIB_ID_SC_TO_SL_MAP		cpu_to_be16(0x0082)
+#define OPA_ATTRIB_ID_SC_TO_VLR_MAP		cpu_to_be16(0x0083)
+#define OPA_ATTRIB_ID_SC_TO_VLT_MAP		cpu_to_be16(0x0084)
+#define OPA_ATTRIB_ID_SC_TO_VLNT_MAP		cpu_to_be16(0x0085)
+/* ... */
+#define OPA_ATTRIB_ID_PORT_STATE_INFO		cpu_to_be16(0x0087)
+/* ... */
+#define OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE	cpu_to_be16(0x008A)
+/* ... */
+
+struct opa_node_description {
+	u8 data[64];
+} __packed;
+
+struct opa_node_info {
+	u8      base_version;
+	u8      class_version;
+	u8      node_type;
+	u8      num_ports;
+	__be32  reserved;
+	__be64  system_image_guid;
+	__be64  node_guid;
+	__be64  port_guid;
+	__be16  partition_cap;
+	__be16  device_id;
+	__be32  revision;
+	u8      local_port_num;
+	u8      vendor_id[3];   /* network byte order */
+} __packed;
+
+#define OPA_PARTITION_TABLE_BLK_SIZE 32
+
+static inline u8
+opa_get_smp_direction(struct opa_smp *smp)
+{
+	return ib_get_smp_direction((struct ib_smp *)smp);
+}
+
+static inline u8 *opa_get_smp_data(struct opa_smp *smp)
+{
+	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		return smp->route.dr.data;
+
+	return smp->route.lid.data;
+}
+
+static inline size_t opa_get_smp_data_size(struct opa_smp *smp)
+{
+	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		return sizeof(smp->route.dr.data);
+
+	return sizeof(smp->route.lid.data);
+}
+
+static inline size_t opa_get_smp_header_size(struct opa_smp *smp)
+{
+	if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		return sizeof(*smp) - sizeof(smp->route.dr.data);
+
+	return sizeof(*smp) - sizeof(smp->route.lid.data);
+}
+
+#endif /* OPA_SMI_H */
diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
new file mode 100644
index 000000000..cbe3c2811
--- /dev/null
+++ b/include/rdma/opa_vnic.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2017 - 2020 Intel Corporation.
+ */
+
+#ifndef _OPA_VNIC_H
+#define _OPA_VNIC_H
+
+/*
+ * This file contains Intel Omni-Path (OPA) Virtual Network Interface
+ * Controller (VNIC) specific declarations.
+ */
+
+#include <rdma/ib_verbs.h>
+
+/* 16 header bytes + 2 reserved bytes */
+#define OPA_VNIC_L2_HDR_LEN   (16 + 2)
+
+#define OPA_VNIC_L4_HDR_LEN   2
+
+#define OPA_VNIC_HDR_LEN      (OPA_VNIC_L2_HDR_LEN + \
+			       OPA_VNIC_L4_HDR_LEN)
+
+#define OPA_VNIC_L4_ETHR  0x78
+
+#define OPA_VNIC_ICRC_LEN   4
+#define OPA_VNIC_TAIL_LEN   1
+#define OPA_VNIC_ICRC_TAIL_LEN  (OPA_VNIC_ICRC_LEN + OPA_VNIC_TAIL_LEN)
+
+#define OPA_VNIC_SKB_MDATA_LEN         4
+#define OPA_VNIC_SKB_MDATA_ENCAP_ERR   0x1
+
+/* opa vnic rdma netdev's private data structure */
+struct opa_vnic_rdma_netdev {
+	struct rdma_netdev rn;  /* keep this first */
+	/* followed by device private data */
+	char *dev_priv[];
+};
+
+static inline void *opa_vnic_priv(const struct net_device *dev)
+{
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	return rn->clnt_priv;
+}
+
+static inline void *opa_vnic_dev_priv(const struct net_device *dev)
+{
+	struct opa_vnic_rdma_netdev *oparn = netdev_priv(dev);
+
+	return oparn->dev_priv;
+}
+
+/* opa_vnic skb meta data structrue */
+struct opa_vnic_skb_mdata {
+	u8 vl;
+	u8 entropy;
+	u8 flags;
+	u8 rsvd;
+} __packed;
+
+/* OPA VNIC group statistics */
+struct opa_vnic_grp_stats {
+	u64 unicast;
+	u64 mcastbcast;
+	u64 untagged;
+	u64 vlan;
+	u64 s_64;
+	u64 s_65_127;
+	u64 s_128_255;
+	u64 s_256_511;
+	u64 s_512_1023;
+	u64 s_1024_1518;
+	u64 s_1519_max;
+};
+
+struct opa_vnic_stats {
+	/* standard netdev statistics */
+	struct rtnl_link_stats64 netstats;
+
+	/* OPA VNIC statistics */
+	struct opa_vnic_grp_stats tx_grp;
+	struct opa_vnic_grp_stats rx_grp;
+	u64 tx_dlid_zero;
+	u64 tx_drop_state;
+	u64 rx_drop_state;
+	u64 rx_runt;
+	u64 rx_oversize;
+};
+
+static inline bool rdma_cap_opa_vnic(struct ib_device *device)
+{
+	return !!(device->attrs.device_cap_flags &
+		  IB_DEVICE_RDMA_NETDEV_OPA);
+}
+
+#endif /* _OPA_VNIC_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
new file mode 100644
index 000000000..32a67af18
--- /dev/null
+++ b/include/rdma/rdma_cm.h
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef RDMA_CM_H
+#define RDMA_CM_H
+
+#include <linux/socket.h>
+#include <linux/in6.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_sa.h>
+#include <uapi/rdma/rdma_user_cm.h>
+
+/*
+ * Upon receiving a device removal event, users must destroy the associated
+ * RDMA identifier and release all resources allocated with the device.
+ */
+enum rdma_cm_event_type {
+	RDMA_CM_EVENT_ADDR_RESOLVED,
+	RDMA_CM_EVENT_ADDR_ERROR,
+	RDMA_CM_EVENT_ROUTE_RESOLVED,
+	RDMA_CM_EVENT_ROUTE_ERROR,
+	RDMA_CM_EVENT_CONNECT_REQUEST,
+	RDMA_CM_EVENT_CONNECT_RESPONSE,
+	RDMA_CM_EVENT_CONNECT_ERROR,
+	RDMA_CM_EVENT_UNREACHABLE,
+	RDMA_CM_EVENT_REJECTED,
+	RDMA_CM_EVENT_ESTABLISHED,
+	RDMA_CM_EVENT_DISCONNECTED,
+	RDMA_CM_EVENT_DEVICE_REMOVAL,
+	RDMA_CM_EVENT_MULTICAST_JOIN,
+	RDMA_CM_EVENT_MULTICAST_ERROR,
+	RDMA_CM_EVENT_ADDR_CHANGE,
+	RDMA_CM_EVENT_TIMEWAIT_EXIT
+};
+
+const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event);
+
+#define RDMA_IB_IP_PS_MASK   0xFFFFFFFFFFFF0000ULL
+#define RDMA_IB_IP_PS_TCP    0x0000000001060000ULL
+#define RDMA_IB_IP_PS_UDP    0x0000000001110000ULL
+#define RDMA_IB_IP_PS_IB     0x00000000013F0000ULL
+
+struct rdma_addr {
+	struct sockaddr_storage src_addr;
+	struct sockaddr_storage dst_addr;
+	struct rdma_dev_addr dev_addr;
+};
+
+struct rdma_route {
+	struct rdma_addr addr;
+	struct sa_path_rec *path_rec;
+	int num_paths;
+};
+
+struct rdma_conn_param {
+	const void *private_data;
+	u8 private_data_len;
+	u8 responder_resources;
+	u8 initiator_depth;
+	u8 flow_control;
+	u8 retry_count;		/* ignored when accepting */
+	u8 rnr_retry_count;
+	/* Fields below ignored if a QP is created on the rdma_cm_id. */
+	u8 srq;
+	u32 qp_num;
+	u32 qkey;
+};
+
+struct rdma_ud_param {
+	const void *private_data;
+	u8 private_data_len;
+	struct rdma_ah_attr ah_attr;
+	u32 qp_num;
+	u32 qkey;
+};
+
+struct rdma_cm_event {
+	enum rdma_cm_event_type	 event;
+	int			 status;
+	union {
+		struct rdma_conn_param	conn;
+		struct rdma_ud_param	ud;
+	} param;
+	struct rdma_ucm_ece ece;
+};
+
+struct rdma_cm_id;
+
+/**
+ * rdma_cm_event_handler - Callback used to report user events.
+ *
+ * Notes: Users may not call rdma_destroy_id from this callback to destroy
+ *   the passed in id, or a corresponding listen id.  Returning a
+ *   non-zero value from the callback will destroy the passed in id.
+ */
+typedef int (*rdma_cm_event_handler)(struct rdma_cm_id *id,
+				     struct rdma_cm_event *event);
+
+struct rdma_cm_id {
+	struct ib_device	*device;
+	void			*context;
+	struct ib_qp		*qp;
+	rdma_cm_event_handler	 event_handler;
+	struct rdma_route	 route;
+	enum rdma_ucm_port_space ps;
+	enum ib_qp_type		 qp_type;
+	u8			 port_num;
+};
+
+struct rdma_cm_id *
+__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler,
+			void *context, enum rdma_ucm_port_space ps,
+			enum ib_qp_type qp_type, const char *caller);
+struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler,
+				       void *context,
+				       enum rdma_ucm_port_space ps,
+				       enum ib_qp_type qp_type);
+
+/**
+ * rdma_create_id - Create an RDMA identifier.
+ *
+ * @net: The network namespace in which to create the new id.
+ * @event_handler: User callback invoked to report events associated with the
+ *   returned rdma_id.
+ * @context: User specified context associated with the id.
+ * @ps: RDMA port space.
+ * @qp_type: type of queue pair associated with the id.
+ *
+ * Returns a new rdma_cm_id. The id holds a reference on the network
+ * namespace until it is destroyed.
+ *
+ * The event handler callback serializes on the id's mutex and is
+ * allowed to sleep.
+ */
+#define rdma_create_id(net, event_handler, context, ps, qp_type)               \
+	__rdma_create_kernel_id(net, event_handler, context, ps, qp_type,      \
+				KBUILD_MODNAME)
+
+/**
+  * rdma_destroy_id - Destroys an RDMA identifier.
+  *
+  * @id: RDMA identifier.
+  *
+  * Note: calling this function has the effect of canceling in-flight
+  * asynchronous operations associated with the id.
+  */
+void rdma_destroy_id(struct rdma_cm_id *id);
+
+/**
+ * rdma_bind_addr - Bind an RDMA identifier to a source address and
+ *   associated RDMA device, if needed.
+ *
+ * @id: RDMA identifier.
+ * @addr: Local address information.  Wildcard values are permitted.
+ *
+ * This associates a source address with the RDMA identifier before calling
+ * rdma_listen.  If a specific local address is given, the RDMA identifier will
+ * be bound to a local RDMA device.
+ */
+int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_resolve_addr - Resolve destination and optional source addresses
+ *   from IP addresses to an RDMA address.  If successful, the specified
+ *   rdma_cm_id will be bound to a local device.
+ *
+ * @id: RDMA identifier.
+ * @src_addr: Source address information.  This parameter may be NULL.
+ * @dst_addr: Destination address information.
+ * @timeout_ms: Time to wait for resolution to complete.
+ */
+int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
+		      const struct sockaddr *dst_addr,
+		      unsigned long timeout_ms);
+
+/**
+ * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
+ *   into route information needed to establish a connection.
+ *
+ * This is called on the client side of a connection.
+ * Users must have first called rdma_resolve_addr to resolve a dst_addr
+ * into an RDMA address before calling this routine.
+ */
+int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms);
+
+/**
+ * rdma_create_qp - Allocate a QP and associate it with the specified RDMA
+ * identifier.
+ *
+ * QPs allocated to an rdma_cm_id will automatically be transitioned by the CMA
+ * through their states.
+ */
+int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
+		   struct ib_qp_init_attr *qp_init_attr);
+
+/**
+ * rdma_destroy_qp - Deallocate the QP associated with the specified RDMA
+ * identifier.
+ *
+ * Users must destroy any QP associated with an RDMA identifier before
+ * destroying the RDMA ID.
+ */
+void rdma_destroy_qp(struct rdma_cm_id *id);
+
+/**
+ * rdma_init_qp_attr - Initializes the QP attributes for use in transitioning
+ *   to a specified QP state.
+ * @id: Communication identifier associated with the QP attributes to
+ *   initialize.
+ * @qp_attr: On input, specifies the desired QP state.  On output, the
+ *   mandatory and desired optional attributes will be set in order to
+ *   modify the QP to the specified state.
+ * @qp_attr_mask: The QP attribute mask that may be used to transition the
+ *   QP to the specified state.
+ *
+ * Users must set the @qp_attr->qp_state to the desired QP state.  This call
+ * will set all required attributes for the given transition, along with
+ * known optional attributes.  Users may override the attributes returned from
+ * this call before calling ib_modify_qp.
+ *
+ * Users that wish to have their QP automatically transitioned through its
+ * states can associate a QP with the rdma_cm_id by calling rdma_create_qp().
+ */
+int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
+		       int *qp_attr_mask);
+
+int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+int rdma_connect_locked(struct rdma_cm_id *id,
+			struct rdma_conn_param *conn_param);
+
+int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+		     struct rdma_ucm_ece *ece);
+
+/**
+ * rdma_listen - This function is called by the passive side to
+ *   listen for incoming connection requests.
+ *
+ * Users must have bound the rdma_cm_id to a local address by calling
+ * rdma_bind_addr before calling this routine.
+ */
+int rdma_listen(struct rdma_cm_id *id, int backlog);
+
+int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
+
+void rdma_lock_handler(struct rdma_cm_id *id);
+void rdma_unlock_handler(struct rdma_cm_id *id);
+int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+		    struct rdma_ucm_ece *ece);
+
+/**
+ * rdma_notify - Notifies the RDMA CM of an asynchronous event that has
+ * occurred on the connection.
+ * @id: Connection identifier to transition to established.
+ * @event: Asynchronous event.
+ *
+ * This routine should be invoked by users to notify the CM of relevant
+ * communication events.  Events that should be reported to the CM and
+ * when to report them are:
+ *
+ * IB_EVENT_COMM_EST - Used when a message is received on a connected
+ *    QP before an RTU has been received.
+ */
+int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event);
+
+/**
+ * rdma_reject - Called to reject a connection request or response.
+ */
+int rdma_reject(struct rdma_cm_id *id, const void *private_data,
+		u8 private_data_len, u8 reason);
+
+/**
+ * rdma_disconnect - This function disconnects the associated QP and
+ *   transitions it into the error state.
+ */
+int rdma_disconnect(struct rdma_cm_id *id);
+
+/**
+ * rdma_join_multicast - Join the multicast group specified by the given
+ *   address.
+ * @id: Communication identifier associated with the request.
+ * @addr: Multicast address identifying the group to join.
+ * @join_state: Multicast JoinState bitmap requested by port.
+ *		Bitmap is based on IB_SA_MCMEMBER_REC_JOIN_STATE bits.
+ * @context: User-defined context associated with the join request, returned
+ * to the user through the private_data pointer in multicast events.
+ */
+int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
+			u8 join_state, void *context);
+
+/**
+ * rdma_leave_multicast - Leave the multicast group specified by the given
+ *   address.
+ */
+void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_set_service_type - Set the type of service associated with a
+ *   connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @tos: Type of service.
+ *
+ * The type of service is interpretted as a differentiated service
+ * field (RFC 2474).  The service type should be specified before
+ * performing route resolution, as existing communication on the
+ * connection identifier may be unaffected.  The type of service
+ * requested may not be supported by the network to all destinations.
+ */
+void rdma_set_service_type(struct rdma_cm_id *id, int tos);
+
+/**
+ * rdma_set_reuseaddr - Allow the reuse of local addresses when binding
+ *    the rdma_cm_id.
+ * @id: Communication identifier to configure.
+ * @reuse: Value indicating if the bound address is reusable.
+ *
+ * Reuse must be set before an address is bound to the id.
+ */
+int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
+
+/**
+ * rdma_set_afonly - Specify that listens are restricted to the
+ *    bound address family only.
+ * @id: Communication identifer to configure.
+ * @afonly: Value indicating if listens are restricted.
+ *
+ * Must be set before identifier is in the listening state.
+ */
+int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
+
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
+ /**
+ * rdma_get_service_id - Return the IB service ID for a specified address.
+ * @id: Communication identifier associated with the address.
+ * @addr: Address for the service ID.
+ */
+__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
+
+/**
+ * rdma_reject_msg - return a pointer to a reject message string.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+						int reason);
+/**
+ * rdma_consumer_reject_data - return the consumer reject private data and
+ *			       length, if any.
+ * @id: Communication identifier that received the REJECT event.
+ * @ev: RDMA CM reject event.
+ * @data_len: Pointer to the resulting length of the consumer data.
+ */
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+				      struct rdma_cm_event *ev, u8 *data_len);
+
+/**
+ * rdma_read_gids - Return the SGID and DGID used for establishing
+ *                  connection. This can be used after rdma_resolve_addr()
+ *                  on client side. This can be use on new connection
+ *                  on server side. This is applicable to IB, RoCE, iWarp.
+ *                  If cm_id is not bound yet to the RDMA device, it doesn't
+ *                  copy and SGID or DGID to the given pointers.
+ * @id: Communication identifier whose GIDs are queried.
+ * @sgid: Pointer to SGID where SGID will be returned. It is optional.
+ * @dgid: Pointer to DGID where DGID will be returned. It is optional.
+ * Note: This API should not be used by any new ULPs or new code.
+ * Instead, users interested in querying GIDs should refer to path record
+ * of the rdma_cm_id to query the GIDs.
+ * This API is provided for compatibility for existing users.
+ */
+
+void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
+		    union ib_gid *dgid);
+
+struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *cm_id);
+struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res);
+
+#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_cm_ib.h b/include/rdma/rdma_cm_ib.h
new file mode 100644
index 000000000..8354e7de7
--- /dev/null
+++ b/include/rdma/rdma_cm_ib.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ */
+
+#ifndef RDMA_CM_IB_H
+#define RDMA_CM_IB_H
+
+#include <rdma/rdma_cm.h>
+
+/**
+ * rdma_set_ib_path - Manually sets the path record used to establish a
+ *   connection.
+ * @id: Connection identifier associated with the request.
+ * @path_rec: Reference to the path record
+ *
+ * This call permits a user to specify routing information for rdma_cm_id's
+ * bound to InfiniBand devices. It is called on the client side of a
+ * connection and replaces the call to rdma_resolve_route.
+ */
+int rdma_set_ib_path(struct rdma_cm_id *id,
+		     struct sa_path_rec *path_rec);
+
+/* Global qkey for UDP QPs and multicast groups. */
+#define RDMA_UDP_QKEY 0x01234567
+
+#endif /* RDMA_CM_IB_H */
diff --git a/include/rdma/rdma_counter.h b/include/rdma/rdma_counter.h
new file mode 100644
index 000000000..eb99856e8
--- /dev/null
+++ b/include/rdma/rdma_counter.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2019 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_COUNTER_H_
+#define _RDMA_COUNTER_H_
+
+#include <linux/mutex.h>
+#include <linux/pid_namespace.h>
+
+#include <rdma/restrack.h>
+#include <rdma/rdma_netlink.h>
+
+struct ib_device;
+struct ib_qp;
+
+struct auto_mode_param {
+	int qp_type;
+};
+
+struct rdma_counter_mode {
+	enum rdma_nl_counter_mode mode;
+	enum rdma_nl_counter_mask mask;
+	struct auto_mode_param param;
+};
+
+struct rdma_port_counter {
+	struct rdma_counter_mode mode;
+	struct rdma_hw_stats *hstats;
+	unsigned int num_counters;
+	struct mutex lock;
+};
+
+struct rdma_counter {
+	struct rdma_restrack_entry	res;
+	struct ib_device		*device;
+	uint32_t			id;
+	struct kref			kref;
+	struct rdma_counter_mode	mode;
+	struct mutex			lock;
+	struct rdma_hw_stats		*stats;
+	u8				port;
+};
+
+void rdma_counter_init(struct ib_device *dev);
+void rdma_counter_release(struct ib_device *dev);
+int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
+			       bool on, enum rdma_nl_counter_mask mask);
+int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port);
+int rdma_counter_unbind_qp(struct ib_qp *qp, bool force);
+
+int rdma_counter_query_stats(struct rdma_counter *counter);
+u64 rdma_counter_get_hwstat_value(struct ib_device *dev, u8 port, u32 index);
+int rdma_counter_bind_qpn(struct ib_device *dev, u8 port,
+			  u32 qp_num, u32 counter_id);
+int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
+				u32 qp_num, u32 *counter_id);
+int rdma_counter_unbind_qpn(struct ib_device *dev, u8 port,
+			    u32 qp_num, u32 counter_id);
+int rdma_counter_get_mode(struct ib_device *dev, u8 port,
+			  enum rdma_nl_counter_mode *mode,
+			  enum rdma_nl_counter_mask *mask);
+
+#endif /* _RDMA_COUNTER_H_ */
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
new file mode 100644
index 000000000..c2a79aeee
--- /dev/null
+++ b/include/rdma/rdma_netlink.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _RDMA_NETLINK_H
+#define _RDMA_NETLINK_H
+
+#include <linux/netlink.h>
+#include <uapi/rdma/rdma_netlink.h>
+
+enum {
+	RDMA_NLDEV_ATTR_EMPTY_STRING = 1,
+	RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16,
+	RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE = 32,
+};
+
+struct rdma_nl_cbs {
+	int (*doit)(struct sk_buff *skb, struct nlmsghdr *nlh,
+		    struct netlink_ext_ack *extack);
+	int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb);
+	u8 flags;
+};
+
+enum rdma_nl_flags {
+	/* Require CAP_NET_ADMIN */
+	RDMA_NL_ADMIN_PERM	= 1 << 0,
+};
+
+/* Define this module as providing netlink services for NETLINK_RDMA, with
+ * index _index.  Since the client indexes were setup in a uapi header as an
+ * enum and we do no want to change that, the user must supply the expanded
+ * constant as well and the compiler checks they are the same.
+ */
+#define MODULE_ALIAS_RDMA_NETLINK(_index, _val)                                \
+	static inline void __maybe_unused __chk_##_index(void)                 \
+	{                                                                      \
+		BUILD_BUG_ON(_index != _val);                                  \
+	}                                                                      \
+	MODULE_ALIAS("rdma-netlink-subsys-" __stringify(_val))
+
+/**
+ * Register client in RDMA netlink.
+ * @index: Index of the added client
+ * @cb_table: A table for op->callback
+ */
+void rdma_nl_register(unsigned int index,
+		      const struct rdma_nl_cbs cb_table[]);
+
+/**
+ * Remove a client from IB netlink.
+ * @index: Index of the removed IB client.
+ */
+void rdma_nl_unregister(unsigned int index);
+
+/**
+ * Put a new message in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Pointer to put the header of the new netlink message.
+ * @seq: The message sequence number.
+ * @len: The requested message length to allocate.
+ * @client: Calling IB netlink client.
+ * @op: message content op.
+ * Returns the allocated buffer on success and NULL on failure.
+ */
+void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
+		   int len, int client, int op, int flags);
+/**
+ * Put a new attribute in a supplied skb.
+ * @skb: The netlink skb.
+ * @nlh: Header of the netlink message to append the attribute to.
+ * @len: The length of the attribute data.
+ * @data: The attribute data to put.
+ * @type: The attribute type.
+ * Returns the 0 and a negative error code on failure.
+ */
+int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
+		  int len, void *data, int type);
+
+/**
+ * Send the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
+ * @skb: The netlink skb
+ * @pid: Userspace netlink process ID
+ * Returns 0 on success or a negative error code.
+ */
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid);
+
+/**
+ * Send, with wait/1 retry, the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
+ * @skb: The netlink skb
+ * @pid: Userspace netlink process ID
+ * Returns 0 on success or a negative error code.
+ */
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid);
+
+/**
+ * Send the supplied skb to a netlink group.
+ * @net: Net namespace in which to send the skb
+ * @skb: The netlink skb
+ * @group: Netlink group ID
+ * @flags: allocation flags
+ * Returns 0 on success or a negative error code.
+ */
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+		      unsigned int group, gfp_t flags);
+
+/**
+ * Check if there are any listeners to the netlink group
+ * @group: the netlink group ID
+ * Returns true on success or false if no listeners.
+ */
+bool rdma_nl_chk_listeners(unsigned int group);
+
+struct rdma_link_ops {
+	struct list_head list;
+	const char *type;
+	int (*newlink)(const char *ibdev_name, struct net_device *ndev);
+};
+
+void rdma_link_register(struct rdma_link_ops *ops);
+void rdma_link_unregister(struct rdma_link_ops *ops);
+
+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
+#define MODULE_ALIAS_RDMA_CLIENT(type) MODULE_ALIAS("rdma-client-" type)
+
+#endif /* _RDMA_NETLINK_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
new file mode 100644
index 000000000..9fd217b24
--- /dev/null
+++ b/include/rdma/rdma_vt.h
@@ -0,0 +1,532 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2016 - 2019 Intel Corporation.
+ */
+
+#ifndef DEF_RDMA_VT_H
+#define DEF_RDMA_VT_H
+
+/*
+ * Structure that low level drivers will populate in order to register with the
+ * rdmavt layer.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/rdmavt_mr.h>
+
+#define RVT_MAX_PKEY_VALUES 16
+
+#define RVT_MAX_TRAP_LEN 100 /* Limit pending trap list */
+#define RVT_MAX_TRAP_LISTS 5 /*((IB_NOTICE_TYPE_INFO & 0x0F) + 1)*/
+#define RVT_TRAP_TIMEOUT 4096 /* 4.096 usec */
+
+struct trap_list {
+	u32 list_len;
+	struct list_head list;
+};
+
+struct rvt_qp;
+struct rvt_qpn_table;
+struct rvt_ibport {
+	struct rvt_qp __rcu *qp[2];
+	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
+	struct rb_root mcast_tree;
+	spinlock_t lock;		/* protect changes in this struct */
+
+	/* non-zero when timer is set */
+	unsigned long mkey_lease_timeout;
+	unsigned long trap_timeout;
+	__be64 gid_prefix;      /* in network order */
+	__be64 mkey;
+	u64 tid;
+	u32 port_cap_flags;
+	u16 port_cap3_flags;
+	u32 pma_sample_start;
+	u32 pma_sample_interval;
+	__be16 pma_counter_select[5];
+	u16 pma_tag;
+	u16 mkey_lease_period;
+	u32 sm_lid;
+	u8 sm_sl;
+	u8 mkeyprot;
+	u8 subnet_timeout;
+	u8 vl_high_limit;
+
+	/*
+	 * Driver is expected to keep these up to date. These
+	 * counters are informational only and not required to be
+	 * completely accurate.
+	 */
+	u64 n_rc_resends;
+	u64 n_seq_naks;
+	u64 n_rdma_seq;
+	u64 n_rnr_naks;
+	u64 n_other_naks;
+	u64 n_loop_pkts;
+	u64 n_pkt_drops;
+	u64 n_vl15_dropped;
+	u64 n_rc_timeouts;
+	u64 n_dmawait;
+	u64 n_unaligned;
+	u64 n_rc_dupreq;
+	u64 n_rc_seqnak;
+	u64 n_rc_crwaits;
+	u16 pkey_violations;
+	u16 qkey_violations;
+	u16 mkey_violations;
+
+	/* Hot-path per CPU counters to avoid cacheline trading to update */
+	u64 z_rc_acks;
+	u64 z_rc_qacks;
+	u64 z_rc_delayed_comp;
+	u64 __percpu *rc_acks;
+	u64 __percpu *rc_qacks;
+	u64 __percpu *rc_delayed_comp;
+
+	void *priv; /* driver private data */
+
+	/*
+	 * The pkey table is allocated and maintained by the driver. Drivers
+	 * need to have access to this before registering with rdmav. However
+	 * rdmavt will need access to it so drivers need to proviee this during
+	 * the attach port API call.
+	 */
+	u16 *pkey_table;
+
+	struct rvt_ah *sm_ah;
+
+	/*
+	 * Keep a list of traps that have not been repressed.  They will be
+	 * resent based on trap_timer.
+	 */
+	struct trap_list trap_lists[RVT_MAX_TRAP_LISTS];
+	struct timer_list trap_timer;
+};
+
+#define RVT_CQN_MAX 16 /* maximum length of cq name */
+
+#define RVT_SGE_COPY_MEMCPY	0
+#define RVT_SGE_COPY_CACHELESS	1
+#define RVT_SGE_COPY_ADAPTIVE	2
+
+/*
+ * Things that are driver specific, module parameters in hfi1 and qib
+ */
+struct rvt_driver_params {
+	struct ib_device_attr props;
+
+	/*
+	 * Anything driver specific that is not covered by props
+	 * For instance special module parameters. Goes here.
+	 */
+	unsigned int lkey_table_size;
+	unsigned int qp_table_size;
+	unsigned int sge_copy_mode;
+	unsigned int wss_threshold;
+	unsigned int wss_clean_period;
+	int qpn_start;
+	int qpn_inc;
+	int qpn_res_start;
+	int qpn_res_end;
+	int nports;
+	int npkeys;
+	int node;
+	int psn_mask;
+	int psn_shift;
+	int psn_modify_mask;
+	u32 core_cap_flags;
+	u32 max_mad_size;
+	u8 qos_shift;
+	u8 max_rdma_atomic;
+	u8 extra_rdma_atomic;
+	u8 reserved_operations;
+};
+
+/* User context */
+struct rvt_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+/* Protection domain */
+struct rvt_pd {
+	struct ib_pd ibpd;
+	bool user;
+};
+
+/* Address handle */
+struct rvt_ah {
+	struct ib_ah ibah;
+	struct rdma_ah_attr attr;
+	u8 vl;
+	u8 log_pmtu;
+};
+
+/*
+ * This structure is used by rvt_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct rvt_mmap_info {
+	struct list_head pending_mmaps;
+	struct ib_ucontext *context;
+	void *obj;
+	__u64 offset;
+	struct kref ref;
+	u32 size;
+};
+
+/* memory working set size */
+struct rvt_wss {
+	unsigned long *entries;
+	atomic_t total_count;
+	atomic_t clean_counter;
+	atomic_t clean_entry;
+
+	int threshold;
+	int num_entries;
+	long pages_mask;
+	unsigned int clean_period;
+};
+
+struct rvt_dev_info;
+struct rvt_swqe;
+struct rvt_driver_provided {
+	/*
+	 * Which functions are required depends on which verbs rdmavt is
+	 * providing and which verbs the driver is overriding. See
+	 * check_support() for details.
+	 */
+
+	/* hot path calldowns in a single cacheline */
+
+	/*
+	 * Give the driver a notice that there is send work to do. It is up to
+	 * the driver to generally push the packets out, this just queues the
+	 * work with the driver. There are two variants here. The no_lock
+	 * version requires the s_lock not to be held. The other assumes the
+	 * s_lock is held.
+	 */
+	bool (*schedule_send)(struct rvt_qp *qp);
+	bool (*schedule_send_no_lock)(struct rvt_qp *qp);
+
+	/*
+	 * Driver specific work request setup and checking.
+	 * This function is allowed to perform any setup, checks, or
+	 * adjustments required to the SWQE in order to be usable by
+	 * underlying protocols. This includes private data structure
+	 * allocations.
+	 */
+	int (*setup_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe,
+			 bool *call_send);
+
+	/*
+	 * Sometimes rdmavt needs to kick the driver's send progress. That is
+	 * done by this call back.
+	 */
+	void (*do_send)(struct rvt_qp *qp);
+
+	/*
+	 * Returns a pointer to the undelying hardware's PCI device. This is
+	 * used to display information as to what hardware is being referenced
+	 * in an output message
+	 */
+	struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi);
+
+	/*
+	 * Allocate a private queue pair data structure for driver specific
+	 * information which is opaque to rdmavt.  Errors are returned via
+	 * ERR_PTR(err).  The driver is free to return NULL or a valid
+	 * pointer.
+	 */
+	void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+
+	/*
+	 * Init a struture allocated with qp_priv_alloc(). This should be
+	 * called after all qp fields have been initialized in rdmavt.
+	 */
+	int (*qp_priv_init)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			    struct ib_qp_init_attr *init_attr);
+
+	/*
+	 * Free the driver's private qp structure.
+	 */
+	void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+
+	/*
+	 * Inform the driver the particular qp in quesiton has been reset so
+	 * that it can clean up anything it needs to.
+	 */
+	void (*notify_qp_reset)(struct rvt_qp *qp);
+
+	/*
+	 * Get a path mtu from the driver based on qp attributes.
+	 */
+	int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+				  struct ib_qp_attr *attr);
+
+	/*
+	 * Notify driver that it needs to flush any outstanding IO requests that
+	 * are waiting on a qp.
+	 */
+	void (*flush_qp_waiters)(struct rvt_qp *qp);
+
+	/*
+	 * Notify driver to stop its queue of sending packets. Nothing else
+	 * should be posted to the queue pair after this has been called.
+	 */
+	void (*stop_send_queue)(struct rvt_qp *qp);
+
+	/*
+	 * Have the drivr drain any in progress operations
+	 */
+	void (*quiesce_qp)(struct rvt_qp *qp);
+
+	/*
+	 * Inform the driver a qp has went to error state.
+	 */
+	void (*notify_error_qp)(struct rvt_qp *qp);
+
+	/*
+	 * Get an MTU for a qp.
+	 */
+	u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   u32 pmtu);
+	/*
+	 * Convert an mtu to a path mtu
+	 */
+	int (*mtu_to_path_mtu)(u32 mtu);
+
+	/*
+	 * Get the guid of a port in big endian byte order
+	 */
+	int (*get_guid_be)(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			   int guid_index, __be64 *guid);
+
+	/*
+	 * Query driver for the state of the port.
+	 */
+	int (*query_port_state)(struct rvt_dev_info *rdi, u8 port_num,
+				struct ib_port_attr *props);
+
+	/*
+	 * Tell driver to shutdown a port
+	 */
+	int (*shut_down_port)(struct rvt_dev_info *rdi, u8 port_num);
+
+	/* Tell driver to send a trap for changed  port capabilities */
+	void (*cap_mask_chg)(struct rvt_dev_info *rdi, u8 port_num);
+
+	/*
+	 * The following functions can be safely ignored completely. Any use of
+	 * these is checked for NULL before blindly calling. Rdmavt should also
+	 * be functional if drivers omit these.
+	 */
+
+	/* Called to inform the driver that all qps should now be freed. */
+	unsigned (*free_all_qps)(struct rvt_dev_info *rdi);
+
+	/* Driver specific AH validation */
+	int (*check_ah)(struct ib_device *, struct rdma_ah_attr *);
+
+	/* Inform the driver a new AH has been created */
+	void (*notify_new_ah)(struct ib_device *, struct rdma_ah_attr *,
+			      struct rvt_ah *);
+
+	/* Let the driver pick the next queue pair number*/
+	int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+			 enum ib_qp_type type, u8 port_num);
+
+	/* Determine if its safe or allowed to modify the qp */
+	int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			       int attr_mask, struct ib_udata *udata);
+
+	/* Driver specific QP modification/notification-of */
+	void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
+			  int attr_mask, struct ib_udata *udata);
+
+	/* Notify driver a mad agent has been created */
+	void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
+
+	/* Notify driver a mad agent has been removed */
+	void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
+
+	/* Notify driver to restart rc */
+	void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait);
+
+	/* Get and return CPU to pin CQ processing thread */
+	int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect);
+};
+
+struct rvt_dev_info {
+	struct ib_device ibdev; /* Keep this first. Nothing above here */
+
+	/*
+	 * Prior to calling for registration the driver will be responsible for
+	 * allocating space for this structure.
+	 *
+	 * The driver will also be responsible for filling in certain members of
+	 * dparms.props. The driver needs to fill in dparms exactly as it would
+	 * want values reported to a ULP. This will be returned to the caller
+	 * in rdmavt's device. The driver should also therefore refrain from
+	 * modifying this directly after registration with rdmavt.
+	 */
+
+	/* Driver specific properties */
+	struct rvt_driver_params dparms;
+
+	/* post send table */
+	const struct rvt_operation_params *post_parms;
+
+	/* opcode translation table */
+	const enum ib_wc_opcode *wc_opcode;
+
+	/* Driver specific helper functions */
+	struct rvt_driver_provided driver_f;
+
+	struct rvt_mregion __rcu *dma_mr;
+	struct rvt_lkey_table lkey_table;
+
+	/* Internal use */
+	int n_pds_allocated;
+	spinlock_t n_pds_lock; /* Protect pd allocated count */
+
+	int n_ahs_allocated;
+	spinlock_t n_ahs_lock; /* Protect ah allocated count */
+
+	u32 n_srqs_allocated;
+	spinlock_t n_srqs_lock; /* Protect srqs allocated count */
+
+	int flags;
+	struct rvt_ibport **ports;
+
+	/* QP */
+	struct rvt_qp_ibdev *qp_dev;
+	u32 n_qps_allocated;    /* number of QPs allocated for device */
+	u32 n_rc_qps;		/* number of RC QPs allocated for device */
+	u32 busy_jiffies;	/* timeout scaling based on RC QP count */
+	spinlock_t n_qps_lock;	/* protect qps, rc qps and busy jiffy counts */
+
+	/* memory maps */
+	struct list_head pending_mmaps;
+	spinlock_t mmap_offset_lock; /* protect mmap_offset */
+	u32 mmap_offset;
+	spinlock_t pending_lock; /* protect pending mmap list */
+
+	/* CQ */
+	u32 n_cqs_allocated;    /* number of CQs allocated for device */
+	spinlock_t n_cqs_lock; /* protect count of in use cqs */
+
+	/* Multicast */
+	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+	spinlock_t n_mcast_grps_lock;
+
+	/* Memory Working Set Size */
+	struct rvt_wss *wss;
+};
+
+/**
+ * rvt_set_ibdev_name - Craft an IB device name from client info
+ * @rdi: pointer to the client rvt_dev_info structure
+ * @name: client specific name
+ * @unit: client specific unit number.
+ */
+static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi,
+				      const char *fmt, const char *name,
+				      const int unit)
+{
+	/*
+	 * FIXME: rvt and its users want to touch the ibdev before
+	 * registration and have things like the name work. We don't have the
+	 * infrastructure in the core to support this directly today, hack it
+	 * to work by setting the name manually here.
+	 */
+	dev_set_name(&rdi->ibdev.dev, fmt, name, unit);
+	strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX);
+}
+
+/**
+ * rvt_get_ibdev_name - return the IB name
+ * @rdi: rdmavt device
+ *
+ * Return the registered name of the device.
+ */
+static inline const char *rvt_get_ibdev_name(const struct rvt_dev_info *rdi)
+{
+	return dev_name(&rdi->ibdev.dev);
+}
+
+static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct rvt_pd, ibpd);
+}
+
+static inline struct rvt_ah *ibah_to_rvtah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct rvt_ah, ibah);
+}
+
+static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev)
+{
+	return  container_of(ibdev, struct rvt_dev_info, ibdev);
+}
+
+static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
+{
+	/*
+	 * All ports have same number of pkeys.
+	 */
+	return rdi->dparms.npkeys;
+}
+
+/*
+ * Return the max atomic suitable for determining
+ * the size of the ack ring buffer in a QP.
+ */
+static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
+{
+	return rdi->dparms.max_rdma_atomic +
+		rdi->dparms.extra_rdma_atomic + 1;
+}
+
+static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
+{
+	return rdi->dparms.max_rdma_atomic +
+		rdi->dparms.extra_rdma_atomic;
+}
+
+/*
+ * Return the indexed PKEY from the port PKEY table.
+ */
+static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi,
+			       int port_index,
+			       unsigned index)
+{
+	if (index >= rvt_get_npkeys(rdi))
+		return 0;
+	else
+		return rdi->ports[port_index]->pkey_table[index];
+}
+
+struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
+void rvt_dealloc_device(struct rvt_dev_info *rdi);
+int rvt_register_device(struct rvt_dev_info *rvd);
+void rvt_unregister_device(struct rvt_dev_info *rvd);
+int rvt_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
+int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port,
+		  int port_index, u16 *pkey_table);
+int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
+		    int access);
+int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
+int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
+		u32 len, u64 vaddr, u32 rkey, int acc);
+int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
+		struct rvt_sge *isge, struct rvt_sge *last_sge,
+		struct ib_sge *sge, int acc);
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
+				 u16 lid);
+
+#endif          /* DEF_RDMA_VT_H */
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
new file mode 100644
index 000000000..1fe2bb5a6
--- /dev/null
+++ b/include/rdma/rdmavt_cq.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2016 - 2018 Intel Corporation.
+ */
+
+#ifndef DEF_RDMAVT_INCCQ_H
+#define DEF_RDMAVT_INCCQ_H
+
+#include <linux/kthread.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define RVT_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
+
+/*
+ * Define read macro that apply smp_load_acquire memory barrier
+ * when reading indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_READ_UAPI_ATOMIC(member) smp_load_acquire(&(member).val)
+
+/*
+ * Define write macro that uses smp_store_release memory barrier
+ * when writing indice of circular buffer that mmaped to user space.
+ */
+#define RDMA_WRITE_UAPI_ATOMIC(member, x) smp_store_release(&(member).val, x)
+#include <rdma/rvt-abi.h>
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct rvt_k_cq_wc {
+	u32 head;               /* index of next entry to fill */
+	u32 tail;               /* index of next ib_poll_cq() entry */
+	struct ib_wc kqueue[];
+};
+
+/*
+ * The completion queue structure.
+ */
+struct rvt_cq {
+	struct ib_cq ibcq;
+	struct work_struct comptask;
+	spinlock_t lock; /* protect changes in this struct */
+	u8 notify;
+	u8 triggered;
+	u8 cq_full;
+	int comp_vector_cpu;
+	struct rvt_dev_info *rdi;
+	struct rvt_cq_wc *queue;
+	struct rvt_mmap_info *ip;
+	struct rvt_k_cq_wc *kqueue;
+};
+
+static inline struct rvt_cq *ibcq_to_rvtcq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct rvt_cq, ibcq);
+}
+
+bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited);
+
+#endif          /* DEF_RDMAVT_INCCQH */
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
new file mode 100644
index 000000000..c3367e983
--- /dev/null
+++ b/include/rdma/rdmavt_mr.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ */
+
+#ifndef DEF_RDMAVT_INCMR_H
+#define DEF_RDMAVT_INCMR_H
+
+/*
+ * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once
+ * drivers no longer need access to the MR directly.
+ */
+#include <linux/percpu-refcount.h>
+
+/*
+ * A segment is a linear region of low physical memory.
+ * Used by the verbs layer.
+ */
+struct rvt_seg {
+	void *vaddr;
+	size_t length;
+};
+
+/* The number of rvt_segs that fit in a page. */
+#define RVT_SEGSZ     (PAGE_SIZE / sizeof(struct rvt_seg))
+
+struct rvt_segarray {
+	struct rvt_seg segs[RVT_SEGSZ];
+};
+
+struct rvt_mregion {
+	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
+	u64 user_base;          /* User's address for this region */
+	u64 iova;               /* IB start address of this region */
+	size_t length;
+	u32 lkey;
+	u32 offset;             /* offset (bytes) to start of region */
+	int access_flags;
+	u32 max_segs;           /* number of rvt_segs in all the arrays */
+	u32 mapsz;              /* size of the map array */
+	atomic_t lkey_invalid;	/* true if current lkey is invalid */
+	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
+	u8  lkey_published;     /* in global table */
+	struct percpu_ref refcount;
+	struct completion comp; /* complete when refcount goes to zero */
+	struct rvt_segarray *map[];    /* the segments */
+};
+
+#define RVT_MAX_LKEY_TABLE_BITS 23
+
+struct rvt_lkey_table {
+	/* read mostly fields */
+	u32 max;                /* size of the table */
+	u32 shift;              /* lkey/rkey shift */
+	struct rvt_mregion __rcu **table;
+	/* writeable fields */
+	/* protect changes in this struct */
+	spinlock_t lock ____cacheline_aligned_in_smp;
+	u32 next;               /* next unused index (speeds search) */
+	u32 gen;                /* generation count */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct rvt_sge {
+	struct rvt_mregion *mr;
+	void *vaddr;            /* kernel virtual address of segment */
+	u32 sge_length;         /* length of the SGE */
+	u32 length;             /* remaining length of the segment */
+	u16 m;                  /* current index: mr->map[m] */
+	u16 n;                  /* current index: mr->map[m]->segs[n] */
+};
+
+struct rvt_sge_state {
+	struct rvt_sge *sg_list;      /* next SGE to be used if any */
+	struct rvt_sge sge;   /* progress state for the current SGE */
+	u32 total_len;
+	u8 num_sge;
+};
+
+static inline void rvt_put_mr(struct rvt_mregion *mr)
+{
+	percpu_ref_put(&mr->refcount);
+}
+
+static inline void rvt_get_mr(struct rvt_mregion *mr)
+{
+	percpu_ref_get(&mr->refcount);
+}
+
+static inline void rvt_put_ss(struct rvt_sge_state *ss)
+{
+	while (ss->num_sge) {
+		rvt_put_mr(ss->sge.mr);
+		if (--ss->num_sge)
+			ss->sge = *ss->sg_list++;
+	}
+}
+
+static inline u32 rvt_get_sge_length(struct rvt_sge *sge, u32 length)
+{
+	u32 len = sge->length;
+
+	if (len > length)
+		len = length;
+	if (len > sge->sge_length)
+		len = sge->sge_length;
+
+	return len;
+}
+
+static inline void rvt_update_sge(struct rvt_sge_state *ss, u32 length,
+				  bool release)
+{
+	struct rvt_sge *sge = &ss->sge;
+
+	sge->vaddr += length;
+	sge->length -= length;
+	sge->sge_length -= length;
+	if (sge->sge_length == 0) {
+		if (release)
+			rvt_put_mr(sge->mr);
+		if (--ss->num_sge)
+			*sge = *ss->sg_list++;
+	} else if (sge->length == 0 && sge->mr->lkey) {
+		if (++sge->n >= RVT_SEGSZ) {
+			if (++sge->m >= sge->mr->mapsz)
+				return;
+			sge->n = 0;
+		}
+		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+	}
+}
+
+static inline void rvt_skip_sge(struct rvt_sge_state *ss, u32 length,
+				bool release)
+{
+	struct rvt_sge *sge = &ss->sge;
+
+	while (length) {
+		u32 len = rvt_get_sge_length(sge, length);
+
+		WARN_ON_ONCE(len == 0);
+		rvt_update_sge(ss, len, release);
+		length -= len;
+	}
+}
+
+bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey);
+bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey);
+
+#endif          /* DEF_RDMAVT_INCMRH */
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
new file mode 100644
index 000000000..8275954f5
--- /dev/null
+++ b/include/rdma/rdmavt_qp.h
@@ -0,0 +1,1003 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/*
+ * Copyright(c) 2016 - 2020 Intel Corporation.
+ */
+
+#ifndef DEF_RDMAVT_INCQP_H
+#define DEF_RDMAVT_INCQP_H
+
+#include <rdma/rdma_vt.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_cq.h>
+#include <rdma/rvt-abi.h>
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define RVT_R_WRID_VALID        0
+#define RVT_R_REWIND_SGE        1
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define RVT_R_REUSE_SGE 0x01
+#define RVT_R_RDMAR_SEQ 0x02
+#define RVT_R_RSP_NAK   0x04
+#define RVT_R_RSP_SEND  0x08
+#define RVT_R_COMM_EST  0x10
+
+/*
+ * If a packet's QP[23:16] bits match this value, then it is
+ * a PSM packet and the hardware will expect a KDETH header
+ * following the BTH.
+ */
+#define RVT_KDETH_QP_PREFIX       0x80
+#define RVT_KDETH_QP_SUFFIX       0xffff
+#define RVT_KDETH_QP_PREFIX_MASK  0x00ff0000
+#define RVT_KDETH_QP_PREFIX_SHIFT 16
+#define RVT_KDETH_QP_BASE         (u32)(RVT_KDETH_QP_PREFIX << \
+					RVT_KDETH_QP_PREFIX_SHIFT)
+#define RVT_KDETH_QP_MAX          (u32)(RVT_KDETH_QP_BASE + RVT_KDETH_QP_SUFFIX)
+
+/*
+ * If a packet's LNH == BTH and DEST QPN[23:16] in the BTH match this
+ * prefix value, then it is an AIP packet with a DETH containing the entropy
+ * value in byte 4 following the BTH.
+ */
+#define RVT_AIP_QP_PREFIX       0x81
+#define RVT_AIP_QP_SUFFIX       0xffff
+#define RVT_AIP_QP_PREFIX_MASK  0x00ff0000
+#define RVT_AIP_QP_PREFIX_SHIFT 16
+#define RVT_AIP_QP_BASE         (u32)(RVT_AIP_QP_PREFIX << \
+				      RVT_AIP_QP_PREFIX_SHIFT)
+#define RVT_AIP_QPN_MAX         BIT(RVT_AIP_QP_PREFIX_SHIFT)
+#define RVT_AIP_QP_MAX          (u32)(RVT_AIP_QP_BASE + RVT_AIP_QPN_MAX - 1)
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
+ * RVT_S_BUSY - send tasklet is processing the QP
+ * RVT_S_TIMER - the RC retry timer is active
+ * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
+ * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
+ *                         before processing the next SWQE
+ * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
+ *                         before processing the next SWQE
+ * RVT_S_WAIT_RNR - waiting for RNR timeout
+ * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ *                  next send completion entry not via send DMA
+ * RVT_S_WAIT_PIO - waiting for a send buffer to be available
+ * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available
+ * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
+ * RVT_S_WAIT_KMEM - waiting for kernel memory to be available
+ * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
+ * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests
+ * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK
+ * RVT_S_ECN - a BECN was queued to the send engine
+ * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt
+ */
+#define RVT_S_SIGNAL_REQ_WR	0x0001
+#define RVT_S_BUSY		0x0002
+#define RVT_S_TIMER		0x0004
+#define RVT_S_RESP_PENDING	0x0008
+#define RVT_S_ACK_PENDING	0x0010
+#define RVT_S_WAIT_FENCE	0x0020
+#define RVT_S_WAIT_RDMAR	0x0040
+#define RVT_S_WAIT_RNR		0x0080
+#define RVT_S_WAIT_SSN_CREDIT	0x0100
+#define RVT_S_WAIT_DMA		0x0200
+#define RVT_S_WAIT_PIO		0x0400
+#define RVT_S_WAIT_TX		0x0800
+#define RVT_S_WAIT_DMA_DESC	0x1000
+#define RVT_S_WAIT_KMEM		0x2000
+#define RVT_S_WAIT_PSN		0x4000
+#define RVT_S_WAIT_ACK		0x8000
+#define RVT_S_SEND_ONE		0x10000
+#define RVT_S_UNLIMITED_CREDIT	0x20000
+#define RVT_S_ECN		0x40000
+#define RVT_S_MAX_BIT_MASK	0x800000
+
+/*
+ * Drivers should use s_flags starting with bit 31 down to the bit next to
+ * RVT_S_MAX_BIT_MASK
+ */
+
+/*
+ * Wait flags that would prevent any packet type from being sent.
+ */
+#define RVT_S_ANY_WAIT_IO \
+	(RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
+	 RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
+
+/*
+ * Wait flags that would prevent send work requests from making progress.
+ */
+#define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \
+	RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \
+	RVT_S_WAIT_PSN | RVT_S_WAIT_ACK)
+
+#define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
+
+/* Number of bits to pay attention to in the opcode for checking qp type */
+#define RVT_OPCODE_QP_MASK 0xE0
+
+/* Flags for checking QP state (see ib_rvt_state_ops[]) */
+#define RVT_POST_SEND_OK                0x01
+#define RVT_POST_RECV_OK                0x02
+#define RVT_PROCESS_RECV_OK             0x04
+#define RVT_PROCESS_SEND_OK             0x08
+#define RVT_PROCESS_NEXT_SEND_OK        0x10
+#define RVT_FLUSH_SEND			0x20
+#define RVT_FLUSH_RECV			0x40
+#define RVT_PROCESS_OR_FLUSH_SEND \
+	(RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND)
+#define RVT_SEND_OR_FLUSH_OR_RECV_OK \
+	(RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND | RVT_PROCESS_RECV_OK)
+
+/*
+ * Internal send flags
+ */
+#define RVT_SEND_RESERVE_USED           IB_SEND_RESERVED_START
+#define RVT_SEND_COMPLETION_ONLY	(IB_SEND_RESERVED_START << 1)
+
+/**
+ * rvt_ud_wr - IB UD work plus AH cache
+ * @wr: valid IB work request
+ * @attr: pointer to an allocated AH attribute
+ *
+ * Special case the UD WR so we can keep track of the AH attributes.
+ *
+ * NOTE: This data structure is stricly ordered wr then attr. I.e the attr
+ * MUST come after wr.  The ib_ud_wr is sized and copied in rvt_post_one_wr.
+ * The copy assumes that wr is first.
+ */
+struct rvt_ud_wr {
+	struct ib_ud_wr wr;
+	struct rdma_ah_attr *attr;
+};
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct rvt_swqe {
+	union {
+		struct ib_send_wr wr;   /* don't use wr.sg_list */
+		struct rvt_ud_wr ud_wr;
+		struct ib_reg_wr reg_wr;
+		struct ib_rdma_wr rdma_wr;
+		struct ib_atomic_wr atomic_wr;
+	};
+	u32 psn;                /* first packet sequence number */
+	u32 lpsn;               /* last packet sequence number */
+	u32 ssn;                /* send sequence number */
+	u32 length;             /* total length of data in sg_list */
+	void *priv;             /* driver dependent field */
+	struct rvt_sge sg_list[];
+};
+
+/**
+ * struct rvt_krwq - kernel struct receive work request
+ * @p_lock: lock to protect producer of the kernel buffer
+ * @head: index of next entry to fill
+ * @c_lock:lock to protect consumer of the kernel buffer
+ * @tail: index of next entry to pull
+ * @count: count is aproximate of total receive enteries posted
+ * @rvt_rwqe: struct of receive work request queue entry
+ *
+ * This structure is used to contain the head pointer,
+ * tail pointer and receive work queue entries for kernel
+ * mode user.
+ */
+struct rvt_krwq {
+	spinlock_t p_lock;	/* protect producer */
+	u32 head;               /* new work requests posted to the head */
+
+	/* protect consumer */
+	spinlock_t c_lock ____cacheline_aligned_in_smp;
+	u32 tail;               /* receives pull requests from here. */
+	u32 count;		/* approx count of receive entries posted */
+	struct rvt_rwqe *curr_wq;
+	struct rvt_rwqe wq[];
+};
+
+/*
+ * rvt_get_swqe_ah - Return the pointer to the struct rvt_ah
+ * @swqe: valid Send WQE
+ *
+ */
+static inline struct rvt_ah *rvt_get_swqe_ah(struct rvt_swqe *swqe)
+{
+	return ibah_to_rvtah(swqe->ud_wr.wr.ah);
+}
+
+/**
+ * rvt_get_swqe_ah_attr - Return the cached ah attribute information
+ * @swqe: valid Send WQE
+ *
+ */
+static inline struct rdma_ah_attr *rvt_get_swqe_ah_attr(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.attr;
+}
+
+/**
+ * rvt_get_swqe_remote_qpn - Access the remote QPN value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qpn(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.wr.remote_qpn;
+}
+
+/**
+ * rvt_get_swqe_remote_qkey - Acces the remote qkey value
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u32 rvt_get_swqe_remote_qkey(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.wr.remote_qkey;
+}
+
+/**
+ * rvt_get_swqe_pkey_index - Access the pkey index
+ * @swqe: valid Send WQE
+ *
+ */
+static inline u16 rvt_get_swqe_pkey_index(struct rvt_swqe *swqe)
+{
+	return swqe->ud_wr.wr.pkey_index;
+}
+
+struct rvt_rq {
+	struct rvt_rwq *wq;
+	struct rvt_krwq *kwq;
+	u32 size;               /* size of RWQE array */
+	u8 max_sge;
+	/* protect changes in this struct */
+	spinlock_t lock ____cacheline_aligned_in_smp;
+};
+
+/**
+ * rvt_get_rq_count - count numbers of request work queue entries
+ * in circular buffer
+ * @rq: data structure for request queue entry
+ * @head: head indices of the circular buffer
+ * @tail: tail indices of the circular buffer
+ *
+ * Return - total number of entries in the Receive Queue
+ */
+
+static inline u32 rvt_get_rq_count(struct rvt_rq *rq, u32 head, u32 tail)
+{
+	u32 count = head - tail;
+
+	if ((s32)count < 0)
+		count += rq->size;
+	return count;
+}
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct rvt_ack_entry {
+	struct rvt_sge rdma_sge;
+	u64 atomic_data;
+	u32 psn;
+	u32 lpsn;
+	u8 opcode;
+	u8 sent;
+	void *priv;
+};
+
+#define	RC_QP_SCALING_INTERVAL	5
+
+#define RVT_OPERATION_PRIV        0x00000001
+#define RVT_OPERATION_ATOMIC      0x00000002
+#define RVT_OPERATION_ATOMIC_SGE  0x00000004
+#define RVT_OPERATION_LOCAL       0x00000008
+#define RVT_OPERATION_USE_RESERVE 0x00000010
+#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
+
+#define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
+
+/**
+ * rvt_operation_params - op table entry
+ * @length - the length to copy into the swqe entry
+ * @qpt_support - a bit mask indicating QP type support
+ * @flags - RVT_OPERATION flags (see above)
+ *
+ * This supports table driven post send so that
+ * the driver can have differing an potentially
+ * different sets of operations.
+ *
+ **/
+
+struct rvt_operation_params {
+	size_t length;
+	u32 qpt_support;
+	u32 flags;
+};
+
+/*
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct rvt_qp {
+	struct ib_qp ibqp;
+	void *priv; /* Driver private data */
+	/* read mostly fields above and below */
+	struct rdma_ah_attr remote_ah_attr;
+	struct rdma_ah_attr alt_ah_attr;
+	struct rvt_qp __rcu *next;           /* link list for QPN hash table */
+	struct rvt_swqe *s_wq;  /* send work queue */
+	struct rvt_mmap_info *ip;
+
+	unsigned long timeout_jiffies;  /* computed from timeout */
+
+	int srate_mbps;		/* s_srate (below) converted to Mbit/s */
+	pid_t pid;		/* pid for user mode QPs */
+	u32 remote_qpn;
+	u32 qkey;               /* QKEY for this QP (for UD or RD) */
+	u32 s_size;             /* send work queue size */
+
+	u16 pmtu;		/* decoded from path_mtu */
+	u8 log_pmtu;		/* shift for pmtu */
+	u8 state;               /* QP state */
+	u8 allowed_ops;		/* high order bits of allowed opcodes */
+	u8 qp_access_flags;
+	u8 alt_timeout;         /* Alternate path timeout for this QP */
+	u8 timeout;             /* Timeout for this QP */
+	u8 s_srate;
+	u8 s_mig_state;
+	u8 port_num;
+	u8 s_pkey_index;        /* PKEY index to use */
+	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
+	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
+	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
+	u8 s_retry_cnt;         /* number of times to retry */
+	u8 s_rnr_retry_cnt;
+	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
+	u8 s_max_sge;           /* size of s_wq->sg_list */
+	u8 s_draining;
+
+	/* start of read/write fields */
+	atomic_t refcount ____cacheline_aligned_in_smp;
+	wait_queue_head_t wait;
+
+	struct rvt_ack_entry *s_ack_queue;
+	struct rvt_sge_state s_rdma_read_sge;
+
+	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
+	u32 r_psn;              /* expected rcv packet sequence number */
+	unsigned long r_aflags;
+	u64 r_wr_id;            /* ID for current receive WQE */
+	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
+	u32 r_len;              /* total length of r_sge */
+	u32 r_rcv_len;          /* receive data len processed */
+	u32 r_msn;              /* message sequence number */
+
+	u8 r_state;             /* opcode of last packet received */
+	u8 r_flags;
+	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
+	u8 r_adefered;          /* defered ack count */
+
+	struct list_head rspwait;       /* link for waiting to respond */
+
+	struct rvt_sge_state r_sge;     /* current receive data */
+	struct rvt_rq r_rq;             /* receive work queue */
+
+	/* post send line */
+	spinlock_t s_hlock ____cacheline_aligned_in_smp;
+	u32 s_head;             /* new entries added here */
+	u32 s_next_psn;         /* PSN for next request */
+	u32 s_avail;            /* number of entries avail */
+	u32 s_ssn;              /* SSN of tail entry */
+	atomic_t s_reserved_used; /* reserved entries in use */
+
+	spinlock_t s_lock ____cacheline_aligned_in_smp;
+	u32 s_flags;
+	struct rvt_sge_state *s_cur_sge;
+	struct rvt_swqe *s_wqe;
+	struct rvt_sge_state s_sge;     /* current send request data */
+	struct rvt_mregion *s_rdma_mr;
+	u32 s_len;              /* total length of s_sge */
+	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
+	u32 s_last_psn;         /* last response PSN processed */
+	u32 s_sending_psn;      /* lowest PSN that is being sent */
+	u32 s_sending_hpsn;     /* highest PSN that is being sent */
+	u32 s_psn;              /* current packet sequence number */
+	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
+	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
+	u32 s_tail;             /* next entry to process */
+	u32 s_cur;              /* current work queue entry */
+	u32 s_acked;            /* last un-ACK'ed entry */
+	u32 s_last;             /* last completed entry */
+	u32 s_lsn;              /* limit sequence number (credit) */
+	u32 s_ahgpsn;           /* set to the psn in the copy of the header */
+	u16 s_cur_size;         /* size of send packet in bytes */
+	u16 s_rdma_ack_cnt;
+	u8 s_hdrwords;         /* size of s_hdr in 32 bit words */
+	s8 s_ahgidx;
+	u8 s_state;             /* opcode of last packet sent */
+	u8 s_ack_state;         /* opcode of packet to ACK */
+	u8 s_nak_state;         /* non-zero if NAK is pending */
+	u8 r_nak_state;         /* non-zero if NAK is pending */
+	u8 s_retry;             /* requester retry counter */
+	u8 s_rnr_retry;         /* requester RNR retry counter */
+	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
+	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
+	u8 s_acked_ack_queue;   /* index into s_ack_queue[] */
+
+	struct rvt_sge_state s_ack_rdma_sge;
+	struct timer_list s_timer;
+	struct hrtimer s_rnr_timer;
+
+	atomic_t local_ops_pending; /* number of fast_reg/local_inv reqs */
+
+	/*
+	 * This sge list MUST be last. Do not add anything below here.
+	 */
+	struct rvt_sge r_sg_list[] /* verified SGEs */
+		____cacheline_aligned_in_smp;
+};
+
+struct rvt_srq {
+	struct ib_srq ibsrq;
+	struct rvt_rq rq;
+	struct rvt_mmap_info *ip;
+	/* send signal when number of RWQEs < limit */
+	u32 limit;
+};
+
+static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct rvt_srq, ibsrq);
+}
+
+static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct rvt_qp, ibqp);
+}
+
+#define RVT_QPN_MAX                 BIT(24)
+#define RVT_QPNMAP_ENTRIES          (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+#define RVT_BITS_PER_PAGE           (PAGE_SIZE * BITS_PER_BYTE)
+#define RVT_BITS_PER_PAGE_MASK      (RVT_BITS_PER_PAGE - 1)
+#define RVT_QPN_MASK		    IB_QPN_MASK
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct rvt_qpn_map {
+	void *page;
+};
+
+struct rvt_qpn_table {
+	spinlock_t lock; /* protect changes to the qp table */
+	unsigned flags;         /* flags for QP0/1 allocated for each port */
+	u32 last;               /* last QP number allocated */
+	u32 nmaps;              /* size of the map table */
+	u16 limit;
+	u8  incr;
+	/* bit map of free QP numbers other than 0/1 */
+	struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES];
+};
+
+struct rvt_qp_ibdev {
+	u32 qp_table_size;
+	u32 qp_table_bits;
+	struct rvt_qp __rcu **qp_table;
+	spinlock_t qpt_lock; /* qptable lock */
+	struct rvt_qpn_table qpn_table;
+};
+
+/*
+ * There is one struct rvt_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct rvt_mcast_qp.
+ */
+struct rvt_mcast_qp {
+	struct list_head list;
+	struct rvt_qp *qp;
+};
+
+struct rvt_mcast_addr {
+	union ib_gid mgid;
+	u16 lid;
+};
+
+struct rvt_mcast {
+	struct rb_node rb_node;
+	struct rvt_mcast_addr mcast_addr;
+	struct list_head qp_list;
+	wait_queue_head_t wait;
+	atomic_t refcount;
+	int n_attached;
+};
+
+/*
+ * Since struct rvt_swqe is not a fixed size, we can't simply index into
+ * struct rvt_qp.s_wq.  This function does the array index computation.
+ */
+static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp,
+						unsigned n)
+{
+	return (struct rvt_swqe *)((char *)qp->s_wq +
+				     (sizeof(struct rvt_swqe) +
+				      qp->s_max_sge *
+				      sizeof(struct rvt_sge)) * n);
+}
+
+/*
+ * Since struct rvt_rwqe is not a fixed size, we can't simply index into
+ * struct rvt_rwq.wq.  This function does the array index computation.
+ */
+static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n)
+{
+	return (struct rvt_rwqe *)
+		((char *)rq->kwq->curr_wq +
+		 (sizeof(struct rvt_rwqe) +
+		  rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+/**
+ * rvt_is_user_qp - return if this is user mode QP
+ * @qp - the target QP
+ */
+static inline bool rvt_is_user_qp(struct rvt_qp *qp)
+{
+	return !!qp->pid;
+}
+
+/**
+ * rvt_get_qp - get a QP reference
+ * @qp - the QP to hold
+ */
+static inline void rvt_get_qp(struct rvt_qp *qp)
+{
+	atomic_inc(&qp->refcount);
+}
+
+/**
+ * rvt_put_qp - release a QP reference
+ * @qp - the QP to release
+ */
+static inline void rvt_put_qp(struct rvt_qp *qp)
+{
+	if (qp && atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+/**
+ * rvt_put_swqe - drop mr refs held by swqe
+ * @wqe - the send wqe
+ *
+ * This drops any mr references held by the swqe
+ */
+static inline void rvt_put_swqe(struct rvt_swqe *wqe)
+{
+	int i;
+
+	for (i = 0; i < wqe->wr.num_sge; i++) {
+		struct rvt_sge *sge = &wqe->sg_list[i];
+
+		rvt_put_mr(sge->mr);
+	}
+}
+
+/**
+ * rvt_qp_wqe_reserve - reserve operation
+ * @qp - the rvt qp
+ * @wqe - the send wqe
+ *
+ * This routine used in post send to record
+ * a wqe relative reserved operation use.
+ */
+static inline void rvt_qp_wqe_reserve(
+	struct rvt_qp *qp,
+	struct rvt_swqe *wqe)
+{
+	atomic_inc(&qp->s_reserved_used);
+}
+
+/**
+ * rvt_qp_wqe_unreserve - clean reserved operation
+ * @qp - the rvt qp
+ * @flags - send wqe flags
+ *
+ * This decrements the reserve use count.
+ *
+ * This call MUST precede the change to
+ * s_last to insure that post send sees a stable
+ * s_avail.
+ *
+ * An smp_mp__after_atomic() is used to insure
+ * the compiler does not juggle the order of the s_last
+ * ring index and the decrementing of s_reserved_used.
+ */
+static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags)
+{
+	if (unlikely(flags & RVT_SEND_RESERVE_USED)) {
+		atomic_dec(&qp->s_reserved_used);
+		/* insure no compiler re-order up to s_last change */
+		smp_mb__after_atomic();
+	}
+}
+
+extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
+
+/*
+ * Compare the lower 24 bits of the msn values.
+ * Returns an integer <, ==, or > than zero.
+ */
+static inline int rvt_cmp_msn(u32 a, u32 b)
+{
+	return (((int)a) - ((int)b)) << 8;
+}
+
+__be32 rvt_compute_aeth(struct rvt_qp *qp);
+
+void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
+
+u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
+
+/**
+ * rvt_div_round_up_mtu - round up divide
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu round up divide
+ */
+static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len)
+{
+	return (len + qp->pmtu - 1) >> qp->log_pmtu;
+}
+
+/**
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu divide
+ */
+static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
+{
+	return len >> qp->log_pmtu;
+}
+
+/**
+ * rvt_timeout_to_jiffies - Convert a ULP timeout input into jiffies
+ * @timeout - timeout input(0 - 31).
+ *
+ * Return a timeout value in jiffies.
+ */
+static inline unsigned long rvt_timeout_to_jiffies(u8 timeout)
+{
+	if (timeout > 31)
+		timeout = 31;
+
+	return usecs_to_jiffies(1U << timeout) * 4096UL / 1000UL;
+}
+
+/**
+ * rvt_lookup_qpn - return the QP with the given QPN
+ * @ibp: the ibport
+ * @qpn: the QP number to look up
+ *
+ * The caller must hold the rcu_read_lock(), and keep the lock until
+ * the returned qp is no longer in use.
+ */
+static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
+					    struct rvt_ibport *rvp,
+					    u32 qpn) __must_hold(RCU)
+{
+	struct rvt_qp *qp = NULL;
+
+	if (unlikely(qpn <= 1)) {
+		qp = rcu_dereference(rvp->qp[qpn]);
+	} else {
+		u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits);
+
+		for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp;
+			qp = rcu_dereference(qp->next))
+			if (qp->ibqp.qp_num == qpn)
+				break;
+	}
+	return qp;
+}
+
+/**
+ * rvt_mod_retry_timer - mod a retry timer
+ * @qp - the QP
+ * @shift - timeout shift to wait for multiple packets
+ * Modify a potentially already running retry timer
+ */
+static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
+{
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+	lockdep_assert_held(&qp->s_lock);
+	qp->s_flags |= RVT_S_TIMER;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
+		  (qp->timeout_jiffies << shift));
+}
+
+static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
+{
+	return rvt_mod_retry_timer_ext(qp, 0);
+}
+
+/**
+ * rvt_put_qp_swqe - drop refs held by swqe
+ * @qp: the send qp
+ * @wqe: the send wqe
+ *
+ * This drops any references held by the swqe
+ */
+static inline void rvt_put_qp_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	rvt_put_swqe(wqe);
+	if (qp->allowed_ops == IB_OPCODE_UD)
+		rdma_destroy_ah_attr(wqe->ud_wr.attr);
+}
+
+/**
+ * rvt_qp_sqwe_incr - increment ring index
+ * @qp: the qp
+ * @val: the starting value
+ *
+ * Return: the new value wrapping as appropriate
+ */
+static inline u32
+rvt_qp_swqe_incr(struct rvt_qp *qp, u32 val)
+{
+	if (++val >= qp->s_size)
+		val = 0;
+	return val;
+}
+
+int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err);
+
+/**
+ * rvt_recv_cq - add a new entry to completion queue
+ *			by receive queue
+ * @qp: receive queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * receive queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_recv_cq(struct rvt_qp *qp, struct ib_wc *wc,
+			       bool solicited)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.recv_cq);
+
+	if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+		rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_send_cq - add a new entry to completion queue
+ *                        by send queue
+ * @qp: send queue
+ * @wc: work completion entry to add
+ * @solicited: true if @entry is solicited
+ *
+ * This is wrapper function for rvt_enter_cq function call by
+ * send queue. If rvt_cq_enter return false, it means cq is
+ * full and the qp is put into error state.
+ */
+static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc,
+			       bool solicited)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(qp->ibqp.send_cq);
+
+	if (unlikely(!rvt_cq_enter(cq, wc, solicited)))
+		rvt_error_qp(qp, IB_WC_LOC_QP_OP_ERR);
+}
+
+/**
+ * rvt_qp_complete_swqe - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @opcode - wc operation (driver dependent)
+ * @status - completion status
+ *
+ * Update the s_last information, and then insert a send
+ * completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ *
+ * Return: new last
+ */
+static inline u32
+rvt_qp_complete_swqe(struct rvt_qp *qp,
+		     struct rvt_swqe *wqe,
+		     enum ib_wc_opcode opcode,
+		     enum ib_wc_status status)
+{
+	bool need_completion;
+	u64 wr_id;
+	u32 byte_len, last;
+	int flags = wqe->wr.send_flags;
+
+	rvt_qp_wqe_unreserve(qp, flags);
+	rvt_put_qp_swqe(qp, wqe);
+
+	need_completion =
+		!(flags & RVT_SEND_RESERVE_USED) &&
+		(!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+		(flags & IB_SEND_SIGNALED) ||
+		status != IB_WC_SUCCESS);
+	if (need_completion) {
+		wr_id = wqe->wr.wr_id;
+		byte_len = wqe->length;
+		/* above fields required before writing s_last */
+	}
+	last = rvt_qp_swqe_incr(qp, qp->s_last);
+	/* see rvt_qp_is_avail() */
+	smp_store_release(&qp->s_last, last);
+	if (need_completion) {
+		struct ib_wc w = {
+			.wr_id = wr_id,
+			.status = status,
+			.opcode = opcode,
+			.qp = &qp->ibqp,
+			.byte_len = byte_len,
+		};
+		rvt_send_cq(qp, &w, status != IB_WC_SUCCESS);
+	}
+	return last;
+}
+
+extern const int  ib_rvt_state_ops[];
+
+struct rvt_dev_info;
+int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only);
+void rvt_comm_est(struct rvt_qp *qp);
+void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
+unsigned long rvt_rnr_tbl_to_usec(u32 index);
+enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
+void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth);
+void rvt_del_timers_sync(struct rvt_qp *qp);
+void rvt_stop_rc_timers(struct rvt_qp *qp);
+void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift);
+static inline void rvt_add_retry_timer(struct rvt_qp *qp)
+{
+	rvt_add_retry_timer_ext(qp, 0);
+}
+
+void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
+		  void *data, u32 length,
+		  bool release, bool copy_last);
+void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
+		       enum ib_wc_status status);
+void rvt_ruc_loopback(struct rvt_qp *qp);
+
+/**
+ * struct rvt_qp_iter - the iterator for QPs
+ * @qp - the current QP
+ *
+ * This structure defines the current iterator
+ * state for sequenced access to all QPs relative
+ * to an rvt_dev_info.
+ */
+struct rvt_qp_iter {
+	struct rvt_qp *qp;
+	/* private: backpointer */
+	struct rvt_dev_info *rdi;
+	/* private: callback routine */
+	void (*cb)(struct rvt_qp *qp, u64 v);
+	/* private: for arg to callback routine */
+	u64 v;
+	/* private: number of SMI,GSI QPs for device */
+	int specials;
+	/* private: current iterator index */
+	int n;
+};
+
+/**
+ * ib_cq_tail - Return tail index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get tail
+ * of cq buffer.
+ */
+static inline u32 ib_cq_tail(struct ib_cq *send_cq)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+	return ibcq_to_rvtcq(send_cq)->ip ?
+	       RDMA_READ_UAPI_ATOMIC(cq->queue->tail) :
+	       ibcq_to_rvtcq(send_cq)->kqueue->tail;
+}
+
+/**
+ * ib_cq_head - Return head index of cq buffer
+ * @send_cq - The cq for send
+ *
+ * This is called in qp_iter_print to get head
+ * of cq buffer.
+ */
+static inline u32 ib_cq_head(struct ib_cq *send_cq)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(send_cq);
+
+	return ibcq_to_rvtcq(send_cq)->ip ?
+	       RDMA_READ_UAPI_ATOMIC(cq->queue->head) :
+	       ibcq_to_rvtcq(send_cq)->kqueue->head;
+}
+
+/**
+ * rvt_free_rq - free memory allocated for rvt_rq struct
+ * @rvt_rq: request queue data structure
+ *
+ * This function should only be called if the rvt_mmap_info()
+ * has not succeeded.
+ */
+static inline void rvt_free_rq(struct rvt_rq *rq)
+{
+	kvfree(rq->kwq);
+	rq->kwq = NULL;
+	vfree(rq->wq);
+	rq->wq = NULL;
+}
+
+/**
+ * rvt_to_iport - Get the ibport pointer
+ * @qp: the qp pointer
+ *
+ * This function returns the ibport pointer from the qp pointer.
+ */
+static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+	return rdi->ports[qp->port_num - 1];
+}
+
+/**
+ * rvt_rc_credit_avail - Check if there are enough RC credits for the request
+ * @qp: the qp
+ * @wqe: the request
+ *
+ * This function returns false when there are not enough credits for the given
+ * request and true otherwise.
+ */
+static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	lockdep_assert_held(&qp->s_lock);
+	if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
+	    rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+		struct rvt_ibport *rvp = rvt_to_iport(qp);
+
+		qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+		rvp->n_rc_crwaits++;
+		return false;
+	}
+	return true;
+}
+
+struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
+				     u64 v,
+				     void (*cb)(struct rvt_qp *qp, u64 v));
+int rvt_qp_iter_next(struct rvt_qp_iter *iter);
+void rvt_qp_iter(struct rvt_dev_info *rdi,
+		 u64 v,
+		 void (*cb)(struct rvt_qp *qp, u64 v));
+void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey);
+#endif          /* DEF_RDMAVT_INCQP_H */
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
new file mode 100644
index 000000000..d3a1cc5be
--- /dev/null
+++ b/include/rdma/restrack.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_RESTRACK_H_
+#define _RDMA_RESTRACK_H_
+
+#include <linux/typecheck.h>
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/completion.h>
+#include <linux/sched/task.h>
+#include <uapi/rdma/rdma_netlink.h>
+#include <linux/xarray.h>
+
+struct ib_device;
+struct sk_buff;
+
+/**
+ * enum rdma_restrack_type - HW objects to track
+ */
+enum rdma_restrack_type {
+	/**
+	 * @RDMA_RESTRACK_PD: Protection domain (PD)
+	 */
+	RDMA_RESTRACK_PD,
+	/**
+	 * @RDMA_RESTRACK_CQ: Completion queue (CQ)
+	 */
+	RDMA_RESTRACK_CQ,
+	/**
+	 * @RDMA_RESTRACK_QP: Queue pair (QP)
+	 */
+	RDMA_RESTRACK_QP,
+	/**
+	 * @RDMA_RESTRACK_CM_ID: Connection Manager ID (CM_ID)
+	 */
+	RDMA_RESTRACK_CM_ID,
+	/**
+	 * @RDMA_RESTRACK_MR: Memory Region (MR)
+	 */
+	RDMA_RESTRACK_MR,
+	/**
+	 * @RDMA_RESTRACK_CTX: Verbs contexts (CTX)
+	 */
+	RDMA_RESTRACK_CTX,
+	/**
+	 * @RDMA_RESTRACK_COUNTER: Statistic Counter
+	 */
+	RDMA_RESTRACK_COUNTER,
+	/**
+	 * @RDMA_RESTRACK_MAX: Last entry, used for array dclarations
+	 */
+	RDMA_RESTRACK_MAX
+};
+
+/**
+ * struct rdma_restrack_entry - metadata per-entry
+ */
+struct rdma_restrack_entry {
+	/**
+	 * @valid: validity indicator
+	 *
+	 * The entries are filled during rdma_restrack_add,
+	 * can be attempted to be free during rdma_restrack_del.
+	 *
+	 * As an example for that, see mlx5 QPs with type MLX5_IB_QPT_HW_GSI
+	 */
+	bool			valid;
+	/*
+	 * @kref: Protect destroy of the resource
+	 */
+	struct kref		kref;
+	/*
+	 * @comp: Signal that all consumers of resource are completed their work
+	 */
+	struct completion	comp;
+	/**
+	 * @task: owner of resource tracking entity
+	 *
+	 * There are two types of entities: created by user and created
+	 * by kernel.
+	 *
+	 * This is relevant for the entities created by users.
+	 * For the entities created by kernel, this pointer will be NULL.
+	 */
+	struct task_struct	*task;
+	/**
+	 * @kern_name: name of owner for the kernel created entities.
+	 */
+	const char		*kern_name;
+	/**
+	 * @type: various objects in restrack database
+	 */
+	enum rdma_restrack_type	type;
+	/**
+	 * @user: user resource
+	 */
+	bool			user;
+	/**
+	 * @id: ID to expose to users
+	 */
+	u32 id;
+};
+
+int rdma_restrack_count(struct ib_device *dev,
+			enum rdma_restrack_type type);
+/**
+ * rdma_is_kernel_res() - check the owner of resource
+ * @res:  resource entry
+ */
+static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res)
+{
+	return !res->user;
+}
+
+/**
+ * rdma_restrack_get() - grab to protect resource from release
+ * @res:  resource entry
+ */
+int __must_check rdma_restrack_get(struct rdma_restrack_entry *res);
+
+/**
+ * rdma_restrack_put() - release resource
+ * @res:  resource entry
+ */
+int rdma_restrack_put(struct rdma_restrack_entry *res);
+
+/*
+ * Helper functions for rdma drivers when filling out
+ * nldev driver attributes.
+ */
+int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value);
+int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
+			       u32 value);
+int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value);
+int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name,
+			       u64 value);
+int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
+			      const char *str);
+int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
+				 u64 value);
+
+struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev,
+						   enum rdma_restrack_type type,
+						   u32 id);
+#endif /* _RDMA_RESTRACK_H_ */
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
new file mode 100644
index 000000000..6ad9dc836
--- /dev/null
+++ b/include/rdma/rw.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2016 HGST, a Western Digital Company.
+ */
+#ifndef _RDMA_RW_H
+#define _RDMA_RW_H
+
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+#include <rdma/mr_pool.h>
+
+struct rdma_rw_ctx {
+	/* number of RDMA READ/WRITE WRs (not counting MR WRs) */
+	u32			nr_ops;
+
+	/* tag for the union below: */
+	u8			type;
+
+	union {
+		/* for mapping a single SGE: */
+		struct {
+			struct ib_sge		sge;
+			struct ib_rdma_wr	wr;
+		} single;
+
+		/* for mapping of multiple SGEs: */
+		struct {
+			struct ib_sge		*sges;
+			struct ib_rdma_wr	*wrs;
+		} map;
+
+		/* for registering multiple WRs: */
+		struct rdma_rw_reg_ctx {
+			struct ib_sge		sge;
+			struct ib_rdma_wr	wr;
+			struct ib_reg_wr	reg_wr;
+			struct ib_send_wr	inv_wr;
+			struct ib_mr		*mr;
+		} *reg;
+	};
+};
+
+int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+		struct scatterlist *sg, u32 sg_cnt, u32 sg_offset,
+		u64 remote_addr, u32 rkey, enum dma_data_direction dir);
+void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+		struct scatterlist *sg, u32 sg_cnt,
+		enum dma_data_direction dir);
+
+int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+		u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+		struct scatterlist *prot_sg, u32 prot_sg_cnt,
+		struct ib_sig_attrs *sig_attrs, u64 remote_addr, u32 rkey,
+		enum dma_data_direction dir);
+void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+		u8 port_num, struct scatterlist *sg, u32 sg_cnt,
+		struct scatterlist *prot_sg, u32 prot_sg_cnt,
+		enum dma_data_direction dir);
+
+struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
+		u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
+int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
+		struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
+
+unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
+		unsigned int maxpages);
+void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
+int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
+void rdma_rw_cleanup_mrs(struct ib_qp *qp);
+
+#endif /* _RDMA_RW_H */
diff --git a/include/rdma/signature.h b/include/rdma/signature.h
new file mode 100644
index 000000000..d16b0fcc8
--- /dev/null
+++ b/include/rdma/signature.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */
+/*
+ * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_SIGNATURE_H_
+#define _RDMA_SIGNATURE_H_
+
+#include <linux/types.h>
+
+enum ib_signature_prot_cap {
+	IB_PROT_T10DIF_TYPE_1 = 1,
+	IB_PROT_T10DIF_TYPE_2 = 1 << 1,
+	IB_PROT_T10DIF_TYPE_3 = 1 << 2,
+};
+
+enum ib_signature_guard_cap {
+	IB_GUARD_T10DIF_CRC	= 1,
+	IB_GUARD_T10DIF_CSUM	= 1 << 1,
+};
+
+/**
+ * enum ib_signature_type - Signature types
+ * @IB_SIG_TYPE_NONE: Unprotected.
+ * @IB_SIG_TYPE_T10_DIF: Type T10-DIF
+ */
+enum ib_signature_type {
+	IB_SIG_TYPE_NONE,
+	IB_SIG_TYPE_T10_DIF,
+};
+
+/**
+ * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types
+ * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules.
+ * @IB_T10DIF_CSUM: Corresponds to IP checksum rules.
+ */
+enum ib_t10_dif_bg_type {
+	IB_T10DIF_CRC,
+	IB_T10DIF_CSUM,
+};
+
+/**
+ * struct ib_t10_dif_domain - Parameters specific for T10-DIF
+ *     domain.
+ * @bg_type: T10-DIF block guard type (CRC|CSUM)
+ * @pi_interval: protection information interval.
+ * @bg: seed of guard computation.
+ * @app_tag: application tag of guard block
+ * @ref_tag: initial guard block reference tag.
+ * @ref_remap: Indicate wethear the reftag increments each block
+ * @app_escape: Indicate to skip block check if apptag=0xffff
+ * @ref_escape: Indicate to skip block check if reftag=0xffffffff
+ * @apptag_check_mask: check bitmask of application tag.
+ */
+struct ib_t10_dif_domain {
+	enum ib_t10_dif_bg_type bg_type;
+	u16			pi_interval;
+	u16			bg;
+	u16			app_tag;
+	u32			ref_tag;
+	bool			ref_remap;
+	bool			app_escape;
+	bool			ref_escape;
+	u16			apptag_check_mask;
+};
+
+/**
+ * struct ib_sig_domain - Parameters for signature domain
+ * @sig_type: specific signauture type
+ * @sig: union of all signature domain attributes that may
+ *     be used to set domain layout.
+ */
+struct ib_sig_domain {
+	enum ib_signature_type sig_type;
+	union {
+		struct ib_t10_dif_domain dif;
+	} sig;
+};
+
+/**
+ * struct ib_sig_attrs - Parameters for signature handover operation
+ * @check_mask: bitmask for signature byte check (8 bytes)
+ * @mem: memory domain layout descriptor.
+ * @wire: wire domain layout descriptor.
+ * @meta_length: metadata length
+ */
+struct ib_sig_attrs {
+	u8			check_mask;
+	struct ib_sig_domain	mem;
+	struct ib_sig_domain	wire;
+	int			meta_length;
+};
+
+enum ib_sig_err_type {
+	IB_SIG_BAD_GUARD,
+	IB_SIG_BAD_REFTAG,
+	IB_SIG_BAD_APPTAG,
+};
+
+/*
+ * Signature check masks (8 bytes in total) according to the T10-PI standard:
+ *  -------- -------- ------------
+ * | GUARD  | APPTAG |   REFTAG   |
+ * |  2B    |  2B    |    4B      |
+ *  -------- -------- ------------
+ */
+enum {
+	IB_SIG_CHECK_GUARD = 0xc0,
+	IB_SIG_CHECK_APPTAG = 0x30,
+	IB_SIG_CHECK_REFTAG = 0x0f,
+};
+
+/*
+ * struct ib_sig_err - signature error descriptor
+ */
+struct ib_sig_err {
+	enum ib_sig_err_type	err_type;
+	u32			expected;
+	u32			actual;
+	u64			sig_err_offset;
+	u32			key;
+};
+
+#endif /* _RDMA_SIGNATURE_H_ */
diff --git a/include/rdma/tid_rdma_defs.h b/include/rdma/tid_rdma_defs.h
new file mode 100644
index 000000000..08fe47c7a
--- /dev/null
+++ b/include/rdma/tid_rdma_defs.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#ifndef TID_RDMA_DEFS_H
+#define TID_RDMA_DEFS_H
+
+#include <rdma/ib_pack.h>
+
+struct tid_rdma_read_req {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	struct ib_reth reth;
+	__be32 tid_flow_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_read_resp {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[4];
+	__be32 verbs_psn;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_req {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	struct ib_reth reth;
+	__be32 reserved[2];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_resp {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[3];
+	__be32 tid_flow_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_data {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 reserved[6];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_resync {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 reserved[6];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_ack {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[2];
+	__be32 tid_flow_psn;
+	__be32 verbs_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+/*
+ * TID RDMA Opcodes
+ */
+#define IB_OPCODE_TID_RDMA 0xe0
+enum {
+	IB_OPCODE_WRITE_REQ       = 0x0,
+	IB_OPCODE_WRITE_RESP      = 0x1,
+	IB_OPCODE_WRITE_DATA      = 0x2,
+	IB_OPCODE_WRITE_DATA_LAST = 0x3,
+	IB_OPCODE_READ_REQ        = 0x4,
+	IB_OPCODE_READ_RESP       = 0x5,
+	IB_OPCODE_RESYNC          = 0x6,
+	IB_OPCODE_ACK             = 0x7,
+
+	IB_OPCODE(TID_RDMA, WRITE_REQ),
+	IB_OPCODE(TID_RDMA, WRITE_RESP),
+	IB_OPCODE(TID_RDMA, WRITE_DATA),
+	IB_OPCODE(TID_RDMA, WRITE_DATA_LAST),
+	IB_OPCODE(TID_RDMA, READ_REQ),
+	IB_OPCODE(TID_RDMA, READ_RESP),
+	IB_OPCODE(TID_RDMA, RESYNC),
+	IB_OPCODE(TID_RDMA, ACK),
+};
+
+#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
+
+/*
+ * Define TID RDMA specific WR opcodes. The ib_wr_opcode
+ * enum already provides some reserved values for use by
+ * low level drivers. Two of those are used but renamed
+ * to be more descriptive.
+ */
+#define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1
+#define IB_WR_TID_RDMA_READ  IB_WR_RESERVED2
+
+#endif /* TID_RDMA_DEFS_H */
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
new file mode 100644
index 000000000..94fac5577
--- /dev/null
+++ b/include/rdma/uverbs_ioctl.h
@@ -0,0 +1,944 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#ifndef _UVERBS_IOCTL_
+#define _UVERBS_IOCTL_
+
+#include <rdma/uverbs_types.h>
+#include <linux/uaccess.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <rdma/ib_user_ioctl_verbs.h>
+#include <rdma/ib_user_ioctl_cmds.h>
+
+/*
+ * =======================================
+ *	Verbs action specifications
+ * =======================================
+ */
+
+enum uverbs_attr_type {
+	UVERBS_ATTR_TYPE_NA,
+	UVERBS_ATTR_TYPE_PTR_IN,
+	UVERBS_ATTR_TYPE_PTR_OUT,
+	UVERBS_ATTR_TYPE_IDR,
+	UVERBS_ATTR_TYPE_FD,
+	UVERBS_ATTR_TYPE_ENUM_IN,
+	UVERBS_ATTR_TYPE_IDRS_ARRAY,
+};
+
+enum uverbs_obj_access {
+	UVERBS_ACCESS_READ,
+	UVERBS_ACCESS_WRITE,
+	UVERBS_ACCESS_NEW,
+	UVERBS_ACCESS_DESTROY
+};
+
+/* Specification of a single attribute inside the ioctl message */
+/* good size 16 */
+struct uverbs_attr_spec {
+	u8 type;
+
+	/*
+	 * Support extending attributes by length. Allow the user to provide
+	 * more bytes than ptr.len, but check that everything after is zero'd
+	 * by the user.
+	 */
+	u8 zero_trailing:1;
+	/*
+	 * Valid only for PTR_IN. Allocate and copy the data inside
+	 * the parser
+	 */
+	u8 alloc_and_copy:1;
+	u8 mandatory:1;
+	/* True if this is from UVERBS_ATTR_UHW */
+	u8 is_udata:1;
+
+	union {
+		struct {
+			/* Current known size to kernel */
+			u16 len;
+			/* User isn't allowed to provide something < min_len */
+			u16 min_len;
+		} ptr;
+
+		struct {
+			/*
+			 * higher bits mean the namespace and lower bits mean
+			 * the type id within the namespace.
+			 */
+			u16 obj_type;
+			u8 access;
+		} obj;
+
+		struct {
+			u8 num_elems;
+		} enum_def;
+	} u;
+
+	/* This weird split lets us remove some padding */
+	union {
+		struct {
+			/*
+			 * The enum attribute can select one of the attributes
+			 * contained in the ids array. Currently only PTR_IN
+			 * attributes are supported in the ids array.
+			 */
+			const struct uverbs_attr_spec *ids;
+		} enum_def;
+
+		struct {
+			/*
+			 * higher bits mean the namespace and lower bits mean
+			 * the type id within the namespace.
+			 */
+			u16				obj_type;
+			u16				min_len;
+			u16				max_len;
+			u8				access;
+		} objs_arr;
+	} u2;
+};
+
+/*
+ * Information about the API is loaded into a radix tree. For IOCTL we start
+ * with a tuple of:
+ *  object_id, attr_id, method_id
+ *
+ * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based
+ * on the current kernel support this is compressed into 16 bit key for the
+ * radix tree. Since this compression is entirely internal to the kernel the
+ * below limits can be revised if the kernel gains additional data.
+ *
+ * With 64 leafs per node this is a 3 level radix tree.
+ *
+ * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
+ * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ *
+ * This also encodes the tables for the write() and write() extended commands
+ * using the coding
+ *   OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command #
+ *   OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex #
+ * ie the WRITE path is treated as a special method type in the ioctl
+ * framework.
+ */
+enum uapi_radix_data {
+	UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
+
+	UVERBS_API_ATTR_KEY_BITS = 6,
+	UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
+	UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+	UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS,
+
+	UVERBS_API_METHOD_KEY_BITS = 5,
+	UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
+	UVERBS_API_METHOD_KEY_NUM_CORE = 22,
+	UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT,
+	UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT,
+	UVERBS_API_METHOD_KEY_NUM_DRIVER =
+		(UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) -
+		UVERBS_API_METHOD_KEY_NUM_CORE,
+	UVERBS_API_METHOD_KEY_MASK = GENMASK(
+		UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
+		UVERBS_API_METHOD_KEY_SHIFT),
+
+	UVERBS_API_OBJ_KEY_BITS = 5,
+	UVERBS_API_OBJ_KEY_SHIFT =
+		UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT,
+	UVERBS_API_OBJ_KEY_NUM_CORE = 20,
+	UVERBS_API_OBJ_KEY_NUM_DRIVER =
+		(1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE,
+	UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT),
+
+	/* This id guaranteed to not exist in the radix tree */
+	UVERBS_API_KEY_ERR = 0xFFFFFFFF,
+};
+
+static inline __attribute_const__ u32 uapi_key_obj(u32 id)
+{
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER)
+			return UVERBS_API_KEY_ERR;
+		id = id + UVERBS_API_OBJ_KEY_NUM_CORE;
+	} else {
+		if (id >= UVERBS_API_OBJ_KEY_NUM_CORE)
+			return UVERBS_API_KEY_ERR;
+	}
+
+	return id << UVERBS_API_OBJ_KEY_SHIFT;
+}
+
+static inline __attribute_const__ bool uapi_key_is_object(u32 key)
+{
+	return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
+{
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER)
+			return UVERBS_API_KEY_ERR;
+		id = id + UVERBS_API_METHOD_KEY_NUM_CORE;
+	} else {
+		id++;
+		if (id >= UVERBS_API_METHOD_KEY_NUM_CORE)
+			return UVERBS_API_KEY_ERR;
+	}
+
+	return id << UVERBS_API_METHOD_KEY_SHIFT;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_method(u32 id)
+{
+	if (id >= UVERBS_API_WRITE_KEY_NUM)
+		return UVERBS_API_KEY_ERR;
+	return UVERBS_API_METHOD_IS_WRITE | id;
+}
+
+static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id)
+{
+	if (id >= UVERBS_API_WRITE_KEY_NUM)
+		return UVERBS_API_KEY_ERR;
+	return UVERBS_API_METHOD_IS_WRITE_EX | id;
+}
+
+static inline __attribute_const__ u32
+uapi_key_attr_to_ioctl_method(u32 attr_key)
+{
+	return attr_key &
+	       (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
+}
+
+static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
+{
+	unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+	return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
+	       (key & UVERBS_API_ATTR_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_method(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE;
+}
+
+static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) ==
+	       UVERBS_API_METHOD_IS_WRITE_EX;
+}
+
+static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
+{
+	/* 0 is the method slot itself */
+	return ioctl_method_key + 1;
+}
+
+static inline __attribute_const__ u32 uapi_key_attr(u32 id)
+{
+	/*
+	 * The attr is designed to fit in the typical single radix tree node
+	 * of 64 entries. Since allmost all methods have driver attributes we
+	 * organize things so that the driver and core attributes interleave to
+	 * reduce the length of the attributes array in typical cases.
+	 */
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		id++;
+		if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+			return UVERBS_API_KEY_ERR;
+		id = (id << 1) | 0;
+	} else {
+		if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+			return UVERBS_API_KEY_ERR;
+		id = (id << 1) | 1;
+	}
+
+	return id;
+}
+
+/* Only true for ioctl methods */
+static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
+{
+	unsigned int method = key & UVERBS_API_METHOD_KEY_MASK;
+
+	return method != 0 && method < UVERBS_API_METHOD_IS_WRITE &&
+	       (key & UVERBS_API_ATTR_KEY_MASK) != 0;
+}
+
+/*
+ * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN),
+ * basically it undoes the reservation of 0 in the ID numbering. attr_key
+ * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of
+ * uapi_key_attr().
+ */
+static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
+{
+	return attr_key - 1;
+}
+
+static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey)
+{
+	return attr_bkey + 1;
+}
+
+/*
+ * =======================================
+ *	Verbs definitions
+ * =======================================
+ */
+
+struct uverbs_attr_def {
+	u16                           id;
+	struct uverbs_attr_spec       attr;
+};
+
+struct uverbs_method_def {
+	u16                                  id;
+	/* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */
+	u32				     flags;
+	size_t				     num_attrs;
+	const struct uverbs_attr_def * const (*attrs)[];
+	int (*handler)(struct uverbs_attr_bundle *attrs);
+};
+
+struct uverbs_object_def {
+	u16					 id;
+	const struct uverbs_obj_type	        *type_attrs;
+	size_t				         num_methods;
+	const struct uverbs_method_def * const (*methods)[];
+};
+
+enum uapi_definition_kind {
+	UAPI_DEF_END = 0,
+	UAPI_DEF_OBJECT_START,
+	UAPI_DEF_WRITE,
+	UAPI_DEF_CHAIN_OBJ_TREE,
+	UAPI_DEF_CHAIN,
+	UAPI_DEF_IS_SUPPORTED_FUNC,
+	UAPI_DEF_IS_SUPPORTED_DEV_FN,
+};
+
+enum uapi_definition_scope {
+	UAPI_SCOPE_OBJECT = 1,
+	UAPI_SCOPE_METHOD = 2,
+};
+
+struct uapi_definition {
+	u8 kind;
+	u8 scope;
+	union {
+		struct {
+			u16 object_id;
+		} object_start;
+		struct {
+			u16 command_num;
+			u8 is_ex:1;
+			u8 has_udata:1;
+			u8 has_resp:1;
+			u8 req_size;
+			u8 resp_size;
+		} write;
+	};
+
+	union {
+		bool (*func_is_supported)(struct ib_device *device);
+		int (*func_write)(struct uverbs_attr_bundle *attrs);
+		const struct uapi_definition *chain;
+		const struct uverbs_object_def *chain_obj_tree;
+		size_t needs_fn_offset;
+	};
+};
+
+/* Define things connected to object_id */
+#define DECLARE_UVERBS_OBJECT(_object_id, ...)                                 \
+	{                                                                      \
+		.kind = UAPI_DEF_OBJECT_START,                                 \
+		.object_start = { .object_id = _object_id },                   \
+	},                                                                     \
+		##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...)              \
+	{                                                                      \
+		.kind = UAPI_DEF_WRITE,                                        \
+		.scope = UAPI_SCOPE_OBJECT,                                    \
+		.write = { .is_ex = 0, .command_num = _command_num },          \
+		.func_write = _func,                                           \
+		_cmd_desc,                                                     \
+	},                                                                     \
+		##__VA_ARGS__
+
+/* Use in a var_args of DECLARE_UVERBS_OBJECT */
+#define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...)           \
+	{                                                                      \
+		.kind = UAPI_DEF_WRITE,                                        \
+		.scope = UAPI_SCOPE_OBJECT,                                    \
+		.write = { .is_ex = 1, .command_num = _command_num },          \
+		.func_write = _func,                                           \
+		_cmd_desc,                                                     \
+	},                                                                     \
+		##__VA_ARGS__
+
+/*
+ * Object is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn)                                        \
+	{                                                                      \
+		.kind = UAPI_DEF_IS_SUPPORTED_DEV_FN,                          \
+		.scope = UAPI_SCOPE_OBJECT,                                    \
+		.needs_fn_offset =                                             \
+			offsetof(struct ib_device_ops, ibdev_fn) +             \
+			BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops,   \
+						       ibdev_fn) !=            \
+					  sizeof(void *)),                     \
+	}
+
+/*
+ * Method is only supported if the function pointer named ibdev_fn in struct
+ * ib_device is not NULL.
+ */
+#define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn)                                     \
+	{                                                                      \
+		.kind = UAPI_DEF_IS_SUPPORTED_DEV_FN,                          \
+		.scope = UAPI_SCOPE_METHOD,                                    \
+		.needs_fn_offset =                                             \
+			offsetof(struct ib_device_ops, ibdev_fn) +             \
+			BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops,   \
+						       ibdev_fn) !=            \
+					  sizeof(void *)),                     \
+	}
+
+/* Call a function to determine if the entire object is supported or not */
+#define UAPI_DEF_IS_OBJ_SUPPORTED(_func)                                       \
+	{                                                                      \
+		.kind = UAPI_DEF_IS_SUPPORTED_FUNC,                            \
+		.scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func,        \
+	}
+
+/* Include another struct uapi_definition in this one */
+#define UAPI_DEF_CHAIN(_def_var)                                               \
+	{                                                                      \
+		.kind = UAPI_DEF_CHAIN, .chain = _def_var,                     \
+	}
+
+/* Temporary until the tree base description is replaced */
+#define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...)                \
+	{                                                                      \
+		.kind = UAPI_DEF_CHAIN_OBJ_TREE,                               \
+		.object_start = { .object_id = _object_enum },                 \
+		.chain_obj_tree = _object_ptr,                                 \
+	},								       \
+		##__VA_ARGS__
+#define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...)                       \
+	UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, &UVERBS_OBJECT(_object_enum),    \
+				##__VA_ARGS__)
+
+/*
+ * =======================================
+ *	Attribute Specifications
+ * =======================================
+ */
+
+#define UVERBS_ATTR_SIZE(_min_len, _len)			\
+	.u.ptr.min_len = _min_len, .u.ptr.len = _len
+
+#define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0)
+
+/*
+ * Specifies a uapi structure that cannot be extended. The user must always
+ * supply the whole structure and nothing more. The structure must be declared
+ * in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_TYPE(_type)					\
+	.u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
+/*
+ * Specifies a uapi structure where the user must provide at least up to
+ * member 'last'.  Anything after last and up until the end of the structure
+ * can be non-zero, anything longer than the end of the structure must be
+ * zero. The structure must be declared in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_STRUCT(_type, _last)                                       \
+	.zero_trailing = 1,                                                    \
+	UVERBS_ATTR_SIZE(offsetofend(_type, _last), sizeof(_type))
+/*
+ * Specifies at least min_len bytes must be passed in, but the amount can be
+ * larger, up to the protocol maximum size. No check for zeroing is done.
+ */
+#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
+
+/* Must be used in the '...' of any UVERBS_ATTR */
+#define UA_ALLOC_AND_COPY .alloc_and_copy = 1
+#define UA_MANDATORY .mandatory = 1
+#define UA_OPTIONAL .mandatory = 0
+
+/*
+ * min_len must be bigger than 0 and _max_len must be smaller than 4095.  Only
+ * READ\WRITE accesses are supported.
+ */
+#define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \
+			     ...)                                              \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = (_attr_id) +                                             \
+		      BUILD_BUG_ON_ZERO((_min_len) == 0 ||                     \
+					(_max_len) >                           \
+						PAGE_SIZE / sizeof(void *) ||  \
+					(_min_len) > (_max_len) ||             \
+					(_access) == UVERBS_ACCESS_NEW ||      \
+					(_access) == UVERBS_ACCESS_DESTROY),   \
+		.attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY,                 \
+			  .u2.objs_arr.obj_type = _idr_type,                   \
+			  .u2.objs_arr.access = _access,                       \
+			  .u2.objs_arr.min_len = _min_len,                     \
+			  .u2.objs_arr.max_len = _max_len,                     \
+			  __VA_ARGS__ } })
+
+/*
+ * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted,
+ * the user must validate the type of the uobject instead.
+ */
+#define UVERBS_IDR_ANY_OBJECT 0xFFFF
+
+#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_IDR,                        \
+			  .u.obj.obj_type = _idr_type,                         \
+			  .u.obj.access = _access,                             \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...)                       \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = (_attr_id) +                                             \
+		      BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW &&      \
+					(_access) != UVERBS_ACCESS_READ),      \
+		.attr = { .type = UVERBS_ATTR_TYPE_FD,                         \
+			  .u.obj.obj_type = _fd_type,                          \
+			  .u.obj.access = _access,                             \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...)                               \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_PTR_IN,                     \
+			  _type,                                               \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...)                              \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT,                    \
+			  _type,                                               \
+			  __VA_ARGS__ } })
+
+/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */
+#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...)                          \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN,                    \
+			  .u2.enum_def.ids = _enum_arr,                        \
+			  .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr),       \
+			  __VA_ARGS__ },                                       \
+	})
+
+/* An input value that is a member in the enum _enum_type. */
+#define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...)                        \
+	UVERBS_ATTR_PTR_IN(                                                    \
+		_attr_id,                                                      \
+		UVERBS_ATTR_SIZE(                                              \
+			sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)),  \
+			sizeof(u64)),                                          \
+		__VA_ARGS__)
+
+/*
+ * An input value that is a bitwise combination of values of _enum_type.
+ * This permits the flag value to be passed as either a u32 or u64, it must
+ * be retrieved via uverbs_get_flag().
+ */
+#define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...)                        \
+	UVERBS_ATTR_PTR_IN(                                                    \
+		_attr_id,                                                      \
+		UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO(              \
+						       !sizeof(_enum_type *)), \
+				 sizeof(u64)),                                 \
+		__VA_ARGS__)
+
+/*
+ * This spec is used in order to pass information to the hardware driver in a
+ * legacy way. Every verb that could get driver specific data should get this
+ * spec.
+ */
+#define UVERBS_ATTR_UHW()                                                      \
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                 \
+			   UVERBS_ATTR_MIN_SIZE(0),			       \
+			   UA_OPTIONAL,                                        \
+			   .is_udata = 1),				       \
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                               \
+			    UVERBS_ATTR_MIN_SIZE(0),			       \
+			    UA_OPTIONAL,                                       \
+			    .is_udata = 1)
+
+/* =================================================
+ *              Parsing infrastructure
+ * =================================================
+ */
+
+
+struct uverbs_ptr_attr {
+	/*
+	 * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is
+	 * used.
+	 */
+	union {
+		void *ptr;
+		u64 data;
+	};
+	u16		len;
+	u16		uattr_idx;
+	u8		enum_id;
+};
+
+struct uverbs_obj_attr {
+	struct ib_uobject		*uobject;
+	const struct uverbs_api_attr	*attr_elm;
+};
+
+struct uverbs_objs_arr_attr {
+	struct ib_uobject **uobjects;
+	u16 len;
+};
+
+struct uverbs_attr {
+	union {
+		struct uverbs_ptr_attr	ptr_attr;
+		struct uverbs_obj_attr	obj_attr;
+		struct uverbs_objs_arr_attr objs_arr_attr;
+	};
+};
+
+struct uverbs_attr_bundle {
+	struct ib_udata driver_udata;
+	struct ib_udata ucore;
+	struct ib_uverbs_file *ufile;
+	struct ib_ucontext *context;
+	struct ib_uobject *uobject;
+	DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
+	struct uverbs_attr attrs[];
+};
+
+static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle,
+					unsigned int idx)
+{
+	return test_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+			attrs_bundle->attr_present);
+}
+
+/**
+ * rdma_udata_to_drv_context - Helper macro to get the driver's context out of
+ *                             ib_udata which is embedded in uverbs_attr_bundle.
+ *
+ * If udata is not NULL this cannot fail. Otherwise a NULL udata will result
+ * in a NULL ucontext pointer, as a safety precaution. Callers should be using
+ * 'udata' to determine if the driver call is in user or kernel mode, not
+ * 'ucontext'.
+ *
+ */
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member)               \
+	(udata ? container_of(container_of(udata, struct uverbs_attr_bundle,   \
+					   driver_udata)                       \
+				      ->context,                               \
+			      drv_dev_struct, member) :                        \
+		 (drv_dev_struct *)NULL)
+
+#define IS_UVERBS_COPY_ERR(_ret)		((_ret) && (_ret) != -ENOENT)
+
+static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
+							u16 idx)
+{
+	if (!uverbs_attr_is_valid(attrs_bundle, idx))
+		return ERR_PTR(-ENOENT);
+
+	return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))];
+}
+
+static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle,
+					  u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	return attr->ptr_attr.enum_id;
+}
+
+static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle,
+					u16 idx)
+{
+	const struct uverbs_attr *attr;
+
+	attr = uverbs_attr_get(attrs_bundle, idx);
+	if (IS_ERR(attr))
+		return ERR_CAST(attr);
+
+	return attr->obj_attr.uobject->object;
+}
+
+static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_attr_bundle *attrs_bundle,
+							 u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return ERR_CAST(attr);
+
+	return attr->obj_attr.uobject;
+}
+
+static inline int
+uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	return attr->ptr_attr.len;
+}
+
+void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *attrs_bundle,
+				 u16 idx);
+
+/*
+ * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr
+ * attribute.
+ * @attrs: The attribute bundle
+ * @idx: The ID of the attribute
+ * @elem_size: The size of the element in the array
+ */
+static inline int
+uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx,
+			       size_t elem_size)
+{
+	int size = uverbs_attr_get_len(attrs, idx);
+
+	if (size < 0)
+		return size;
+
+	if (size % elem_size)
+		return -EINVAL;
+
+	return size / elem_size;
+}
+
+/**
+ * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for
+ * UVERBS_ATTR_TYPE_IDRS_ARRAY.
+ * @arr: Returned pointer to array of pointers for uobjects or NULL if
+ *       the attribute isn't provided.
+ *
+ * Return: The array length or 0 if no attribute was provided.
+ */
+static inline int uverbs_attr_get_uobjs_arr(
+	const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx,
+	struct ib_uobject ***arr)
+{
+	const struct uverbs_attr *attr =
+			uverbs_attr_get(attrs_bundle, attr_idx);
+
+	if (IS_ERR(attr)) {
+		*arr = NULL;
+		return 0;
+	}
+
+	*arr = attr->objs_arr_attr.uobjects;
+
+	return attr->objs_arr_attr.len;
+}
+
+static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
+{
+	return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
+}
+
+static inline void *uverbs_attr_get_alloced_ptr(
+	const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return (void *)attr;
+
+	return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data :
+						 attr->ptr_attr.ptr;
+}
+
+static inline int _uverbs_copy_from(void *to,
+				    const struct uverbs_attr_bundle *attrs_bundle,
+				    size_t idx,
+				    size_t size)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	/*
+	 * Validation ensures attr->ptr_attr.len >= size. If the caller is
+	 * using UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO then it must call
+	 * uverbs_copy_from_or_zero.
+	 */
+	if (unlikely(size < attr->ptr_attr.len))
+		return -EINVAL;
+
+	if (uverbs_attr_ptr_is_inline(attr))
+		memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len);
+	else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+				attr->ptr_attr.len))
+		return -EFAULT;
+
+	return 0;
+}
+
+static inline int _uverbs_copy_from_or_zero(void *to,
+					    const struct uverbs_attr_bundle *attrs_bundle,
+					    size_t idx,
+					    size_t size)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+	size_t min_size;
+
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	min_size = min_t(size_t, size, attr->ptr_attr.len);
+
+	if (uverbs_attr_ptr_is_inline(attr))
+		memcpy(to, &attr->ptr_attr.data, min_size);
+	else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data),
+				min_size))
+		return -EFAULT;
+
+	if (size > min_size)
+		memset(to + min_size, 0, size - min_size);
+
+	return 0;
+}
+
+#define uverbs_copy_from(to, attrs_bundle, idx)				      \
+	_uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to))
+
+#define uverbs_copy_from_or_zero(to, attrs_bundle, idx)			      \
+	_uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
+
+static inline struct ib_ucontext *
+ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs)
+{
+	return ib_uverbs_get_ucontext_file(attrs->ufile);
+}
+
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits);
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits);
+int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx,
+		   const void *from, size_t size);
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+			     gfp_t flags);
+
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+					  size_t size)
+{
+	return _uverbs_alloc(bundle, size, GFP_KERNEL);
+}
+
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+					   size_t size)
+{
+	return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
+}
+
+static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle,
+					    size_t n, size_t size)
+{
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
+		return ERR_PTR(-EOVERFLOW);
+	return uverbs_zalloc(bundle, bytes);
+}
+int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		      size_t idx, s64 lower_bound, u64 upper_bound,
+		      s64 *def_val);
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+				  size_t idx, const void *from, size_t size);
+#else
+static inline int
+uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		   size_t idx, u64 allowed_bits)
+{
+	return -EINVAL;
+}
+static inline int
+uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		   size_t idx, u64 allowed_bits)
+{
+	return -EINVAL;
+}
+static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
+				 size_t idx, const void *from, size_t size)
+{
+	return -EINVAL;
+}
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+					  size_t size)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+					   size_t size)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline int
+_uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		  size_t idx, s64 lower_bound, u64 upper_bound,
+		  s64 *def_val)
+{
+	return -EINVAL;
+}
+static inline int
+uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+			      size_t idx, const void *from, size_t size)
+{
+	return -EINVAL;
+}
+#endif
+
+#define uverbs_get_const(_to, _attrs_bundle, _idx)                             \
+	({                                                                     \
+		s64 _val;                                                      \
+		int _ret = _uverbs_get_const(&_val, _attrs_bundle, _idx,       \
+					     type_min(typeof(*_to)),           \
+					     type_max(typeof(*_to)), NULL);    \
+		(*_to) = _val;                                                 \
+		_ret;                                                          \
+	})
+
+#define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default)           \
+	({                                                                     \
+		s64 _val;                                                      \
+		s64 _def_val = _default;                                       \
+		int _ret =                                                     \
+			_uverbs_get_const(&_val, _attrs_bundle, _idx,          \
+					  type_min(typeof(*_to)),              \
+					  type_max(typeof(*_to)), &_def_val);  \
+		(*_to) = _val;                                                 \
+		_ret;                                                          \
+	})
+#endif
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
new file mode 100644
index 000000000..f04f5126f
--- /dev/null
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#ifndef _UVERBS_NAMED_IOCTL_
+#define _UVERBS_NAMED_IOCTL_
+
+#include <rdma/uverbs_ioctl.h>
+
+#ifndef UVERBS_MODULE_NAME
+#error "Please #define UVERBS_MODULE_NAME before including rdma/uverbs_named_ioctl.h"
+#endif
+
+#define _UVERBS_PASTE(x, y)	x ## y
+#define _UVERBS_NAME(x, y)	_UVERBS_PASTE(x, y)
+#define UVERBS_METHOD(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
+#define UVERBS_HANDLER(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
+#define UVERBS_OBJECT(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _object_##id)
+
+/* These are static so they do not need to be qualified */
+#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
+#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id
+
+#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...)                           \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.handler = UVERBS_HANDLER(_method_id),                         \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	}
+
+/* Create a standard destroy method using the default handler. The handle_attr
+ * argument must be the attribute specifying the handle to destroy, the
+ * default handler does not support any other attributes.
+ */
+#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr)          \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { _handle_attr };                              \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.handler = uverbs_destroy_def_handler,                         \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	}
+
+#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...)              \
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {    \
+		.id = _object_id,                                              \
+		.type_attrs = &_type_attrs,                                    \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	}
+
+/*
+ * Declare global methods. These still have a unique object_id because we
+ * identify all uapi methods with a (object,method) tuple. However, they have
+ * no type pointer.
+ */
+#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...)                         \
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {    \
+		.id = _object_id,                                              \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	}
+
+/* Used by drivers to declare a complete parsing tree for new methods
+ */
+#define ADD_UVERBS_METHODS(_name, _object_id, ...)                             \
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_object_def _name = {                        \
+		.id = _object_id,                                              \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	};
+
+/* Used by drivers to declare a complete parsing tree for a single method that
+ * differs only in having additional driver specific attributes.
+ */
+#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...)       \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	};                                                                     \
+	ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id))
+
+#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
new file mode 100644
index 000000000..fe0512116
--- /dev/null
+++ b/include/rdma/uverbs_std_types.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#ifndef _UVERBS_STD_TYPES__
+#define _UVERBS_STD_TYPES__
+
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/ib_user_ioctl_verbs.h>
+
+/* Returns _id, or causes a compile error if _id is not a u32.
+ *
+ * The uobj APIs should only be used with the write based uAPI to access
+ * object IDs. The write API must use a u32 for the object handle, which is
+ * checked by this macro.
+ */
+#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
+
+#define uobj_get_type(_attrs, _object)                                         \
+	uapi_get_object((_attrs)->ufile->device->uapi, _object)
+
+#define uobj_get_read(_type, _id, _attrs)                                      \
+	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+				_uobj_check_id(_id), UVERBS_LOOKUP_READ,       \
+				_attrs)
+
+#define ufd_get_read(_type, _fdnum, _attrs)                                    \
+	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+				(_fdnum)*typecheck(s32, _fdnum),               \
+				UVERBS_LOOKUP_READ, _attrs)
+
+static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
+{
+	if (IS_ERR(uobj))
+		return NULL;
+	return uobj->object;
+}
+#define uobj_get_obj_read(_object, _type, _id, _attrs)                         \
+	((struct ib_##_object *)_uobj_get_obj_read(                            \
+		uobj_get_read(_type, _id, _attrs)))
+
+#define uobj_get_write(_type, _id, _attrs)                                     \
+	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
+				_uobj_check_id(_id), UVERBS_LOOKUP_WRITE,      \
+				_attrs)
+
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
+			   struct uverbs_attr_bundle *attrs);
+#define uobj_perform_destroy(_type, _id, _attrs)                               \
+	__uobj_perform_destroy(uobj_get_type(_attrs, _type),                   \
+			       _uobj_check_id(_id), _attrs)
+
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
+				      u32 id, struct uverbs_attr_bundle *attrs);
+
+#define uobj_get_destroy(_type, _id, _attrs)                                   \
+	__uobj_get_destroy(uobj_get_type(_attrs, _type), _uobj_check_id(_id),  \
+			   _attrs)
+
+static inline void uobj_put_destroy(struct ib_uobject *uobj)
+{
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
+}
+
+static inline void uobj_put_read(struct ib_uobject *uobj)
+{
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
+}
+
+#define uobj_put_obj_read(_obj)					\
+	uobj_put_read((_obj)->uobject)
+
+static inline void uobj_put_write(struct ib_uobject *uobj)
+{
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+}
+
+static inline void uobj_alloc_abort(struct ib_uobject *uobj,
+				    struct uverbs_attr_bundle *attrs)
+{
+	rdma_alloc_abort_uobject(uobj, attrs, false);
+}
+
+static inline void uobj_finalize_uobj_create(struct ib_uobject *uobj,
+					     struct uverbs_attr_bundle *attrs)
+{
+	/*
+	 * Tell the core code that the write() handler has completed
+	 * initializing the object and that the core should commit or
+	 * abort this object based upon the return code from the write()
+	 * method. Similar to what uverbs_finalize_uobj_create() does for
+	 * ioctl()
+	 */
+	WARN_ON(attrs->uobject);
+	attrs->uobject = uobj;
+}
+
+static inline struct ib_uobject *
+__uobj_alloc(const struct uverbs_api_object *obj,
+	     struct uverbs_attr_bundle *attrs, struct ib_device **ib_dev)
+{
+	struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs);
+
+	if (!IS_ERR(uobj))
+		*ib_dev = attrs->context->device;
+	return uobj;
+}
+
+#define uobj_alloc(_type, _attrs, _ib_dev)                                     \
+	__uobj_alloc(uobj_get_type(_attrs, _type), _attrs, _ib_dev)
+
+static inline void uverbs_flow_action_fill_action(struct ib_flow_action *action,
+						  struct ib_uobject *uobj,
+						  struct ib_device *ib_dev,
+						  enum ib_flow_action_type type)
+{
+	atomic_set(&action->usecnt, 0);
+	action->device = ib_dev;
+	action->type = type;
+	action->uobject = uobj;
+	uobj->object = action;
+}
+
+struct ib_uflow_resources {
+	size_t			max;
+	size_t			num;
+	size_t			collection_num;
+	size_t			counters_num;
+	struct ib_counters	**counters;
+	struct ib_flow_action	**collection;
+};
+
+struct ib_uflow_object {
+	struct ib_uobject		uobject;
+	struct ib_uflow_resources	*resources;
+};
+
+struct ib_uflow_resources *flow_resources_alloc(size_t num_specs);
+void flow_resources_add(struct ib_uflow_resources *uflow_res,
+			enum ib_flow_spec_type type,
+			void *ibobj);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
+
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+			       struct ib_qp *qp, struct ib_device *device,
+			       struct ib_uflow_resources *uflow_res)
+{
+	struct ib_uflow_object *uflow;
+
+	uobj->object = ibflow;
+	ibflow->uobject = uobj;
+
+	if (qp) {
+		atomic_inc(&qp->usecnt);
+		ibflow->qp = qp;
+	}
+
+	ibflow->device = device;
+	uflow = container_of(uobj, typeof(*uflow), uobject);
+	uflow->resources = uflow_res;
+}
+
+struct uverbs_api_object {
+	const struct uverbs_obj_type *type_attrs;
+	const struct uverbs_obj_type_class *type_class;
+	u8 disabled:1;
+	u32 id;
+};
+
+static inline u32 uobj_get_object_id(struct ib_uobject *uobj)
+{
+	return uobj->uapi_object->id;
+}
+
+#endif
+
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
new file mode 100644
index 000000000..06db27e35
--- /dev/null
+++ b/include/rdma/uverbs_types.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#ifndef _UVERBS_TYPES_
+#define _UVERBS_TYPES_
+
+#include <linux/kernel.h>
+#include <rdma/ib_verbs.h>
+
+struct uverbs_obj_type;
+struct uverbs_api_object;
+
+enum rdma_lookup_mode {
+	UVERBS_LOOKUP_READ,
+	UVERBS_LOOKUP_WRITE,
+	/*
+	 * Destroy is like LOOKUP_WRITE, except that the uobject is not
+	 * locked.  uobj_destroy is used to convert a LOOKUP_DESTROY lock into
+	 * a LOOKUP_WRITE lock.
+	 */
+	UVERBS_LOOKUP_DESTROY,
+};
+
+/*
+ * The following sequences are valid:
+ * Success flow:
+ *   alloc_begin
+ *   alloc_commit
+ *    [..]
+ * Access flow:
+ *   lookup_get(exclusive=false) & uverbs_try_lock_object
+ *   lookup_put(exclusive=false) via rdma_lookup_put_uobject
+ * Destruction flow:
+ *   lookup_get(exclusive=true) & uverbs_try_lock_object
+ *   remove_commit
+ *   remove_handle (optional)
+ *   lookup_put(exclusive=true) via rdma_lookup_put_uobject
+ *
+ * Allocate Error flow #1
+ *   alloc_begin
+ *   alloc_abort
+ * Allocate Error flow #2
+ *   alloc_begin
+ *   remove_commit
+ *   alloc_abort
+ * Allocate Error flow #3
+ *   alloc_begin
+ *   alloc_commit (fails)
+ *   remove_commit
+ *   alloc_abort
+ *
+ * In all cases the caller must hold the ufile kref until alloc_commit or
+ * alloc_abort returns.
+ */
+struct uverbs_obj_type_class {
+	struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj,
+					  struct uverbs_attr_bundle *attrs);
+	/* This consumes the kref on uobj */
+	void (*alloc_commit)(struct ib_uobject *uobj);
+	/* This does not consume the kref on uobj */
+	void (*alloc_abort)(struct ib_uobject *uobj);
+
+	struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj,
+					 struct ib_uverbs_file *ufile, s64 id,
+					 enum rdma_lookup_mode mode);
+	void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
+	/* This does not consume the kref on uobj */
+	int __must_check (*destroy_hw)(struct ib_uobject *uobj,
+				       enum rdma_remove_reason why,
+				       struct uverbs_attr_bundle *attrs);
+	void (*remove_handle)(struct ib_uobject *uobj);
+};
+
+struct uverbs_obj_type {
+	const struct uverbs_obj_type_class * const type_class;
+	size_t	     obj_size;
+};
+
+/*
+ * Objects type classes which support a detach state (object is still alive but
+ * it's not attached to any context need to make sure:
+ * (a) no call through to a driver after a detach is called
+ * (b) detach isn't called concurrently with context_cleanup
+ */
+
+struct uverbs_obj_idr_type {
+	/*
+	 * In idr based objects, uverbs_obj_type_class points to a generic
+	 * idr operations. In order to specialize the underlying types (e.g. CQ,
+	 * QPs, etc.), we add destroy_object specific callbacks.
+	 */
+	struct uverbs_obj_type  type;
+
+	/* Free driver resources from the uobject, make the driver uncallable,
+	 * and move the uobject to the detached state. If the object was
+	 * destroyed by the user's request, a failure should leave the uobject
+	 * completely unchanged.
+	 */
+	int __must_check (*destroy_object)(struct ib_uobject *uobj,
+					   enum rdma_remove_reason why,
+					   struct uverbs_attr_bundle *attrs);
+};
+
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
+					   struct ib_uverbs_file *ufile, s64 id,
+					   enum rdma_lookup_mode mode,
+					   struct uverbs_attr_bundle *attrs);
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+			     enum rdma_lookup_mode mode);
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
+					    struct uverbs_attr_bundle *attrs);
+void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
+			      struct uverbs_attr_bundle *attrs,
+			      bool hw_obj_valid);
+void rdma_alloc_commit_uobject(struct ib_uobject *uobj,
+			       struct uverbs_attr_bundle *attrs);
+
+/*
+ * uverbs_uobject_get is called in order to increase the reference count on
+ * an uobject. This is useful when a handler wants to keep the uobject's memory
+ * alive, regardless if this uobject is still alive in the context's objects
+ * repository. Objects are put via uverbs_uobject_put.
+ */
+static inline void uverbs_uobject_get(struct ib_uobject *uobject)
+{
+	kref_get(&uobject->ref);
+}
+void uverbs_uobject_put(struct ib_uobject *uobject);
+
+struct uverbs_obj_fd_type {
+	/*
+	 * In fd based objects, uverbs_obj_type_ops points to generic
+	 * fd operations. In order to specialize the underlying types (e.g.
+	 * completion_channel), we use fops, name and flags for fd creation.
+	 * destroy_object is called when the uobject is to be destroyed,
+	 * because the driver is removed or the FD is closed.
+	 */
+	struct uverbs_obj_type  type;
+	int (*destroy_object)(struct ib_uobject *uobj,
+			      enum rdma_remove_reason why);
+	const struct file_operations	*fops;
+	const char			*name;
+	int				flags;
+};
+
+extern const struct uverbs_obj_type_class uverbs_idr_class;
+extern const struct uverbs_obj_type_class uverbs_fd_class;
+int uverbs_uobject_fd_release(struct inode *inode, struct file *filp);
+
+#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -	\
+				   sizeof(char))
+#define UVERBS_TYPE_ALLOC_FD(_obj_size, _destroy_object, _fops, _name, _flags) \
+	((&((const struct uverbs_obj_fd_type)				\
+	 {.type = {							\
+		.type_class = &uverbs_fd_class,				\
+		.obj_size = (_obj_size) +				\
+			UVERBS_BUILD_BUG_ON((_obj_size) <               \
+					    sizeof(struct ib_uobject)), \
+	 },								\
+	 .destroy_object = _destroy_object,				\
+	 .fops = _fops,							\
+	 .name = _name,							\
+	 .flags = _flags}))->type)
+#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object)	\
+	((&((const struct uverbs_obj_idr_type)				\
+	 {.type = {							\
+		.type_class = &uverbs_idr_class,			\
+		.obj_size = (_size) +					\
+			UVERBS_BUILD_BUG_ON((_size) <			\
+					    sizeof(struct ib_uobject))	\
+	 },								\
+	 .destroy_object = _destroy_object,}))->type)
+#define UVERBS_TYPE_ALLOC_IDR(_destroy_object)			\
+	 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject),	\
+				  _destroy_object)
+
+#endif
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
commit	5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree	a94efe259b9009378be6d90eb30d2b019d95c194 /include/rdma
parent	Initial commit. (diff)
download	linux-430c2fc249ea5c0536abd21c23382884005c9093.tar.xz linux-430c2fc249ea5c0536abd21c23382884005c9093.zip