summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:03 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:03 +0000
commit01a69402cf9d38ff180345d55c2ee51c7e89fbc7 (patch)
treeb406c5242a088c4f59c6e4b719b783f43aca6ae9 /drivers/infiniband/sw
parentAdding upstream version 6.7.12. (diff)
downloadlinux-01a69402cf9d38ff180345d55c2ee51c7e89fbc7.tar.xz
linux-01a69402cf9d38ff180345d55c2ee51c7e89fbc7.zip
Adding upstream version 6.8.9.upstream/6.8.9
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c2
-rw-r--r--drivers/infiniband/sw/siw/siw.h14
-rw-r--r--drivers/infiniband/sw/siw/siw_cm.c145
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c30
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.c121
-rw-r--r--drivers/infiniband/sw/siw/siw_mem.h5
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c84
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c51
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c52
10 files changed, 202 insertions, 304 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 54c723a6ed..6f9ec8db01 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -33,6 +33,8 @@ void rxe_dealloc(struct ib_device *ib_dev)
if (rxe->tfm)
crypto_free_shash(rxe->tfm);
+
+ mutex_destroy(&rxe->usdev_lock);
}
/* initialize rxe device parameters */
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index cec5cccd2e..75253f2b3e 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -121,11 +121,10 @@ struct siw_page_chunk {
};
struct siw_umem {
+ struct ib_umem *base_mem;
struct siw_page_chunk *page_chunk;
int num_pages;
- bool writable;
u64 fp_addr; /* First page base address */
- struct mm_struct *owning_mm;
};
struct siw_pble {
@@ -289,10 +288,11 @@ struct siw_rx_stream {
int skb_offset; /* offset in skb */
int skb_copied; /* processed bytes in skb */
+ enum siw_rx_state state;
+
union iwarp_hdr hdr;
struct mpa_trailer trailer;
-
- enum siw_rx_state state;
+ struct shash_desc *mpa_crc_hd;
/*
* For each FPDU, main RX loop runs through 3 stages:
@@ -314,7 +314,6 @@ struct siw_rx_stream {
u64 ddp_to;
u32 inval_stag; /* Stag to be invalidated */
- struct shash_desc *mpa_crc_hd;
u8 rx_suspend : 1;
u8 pad : 2; /* # of pad bytes expected */
u8 rdmap_op : 4; /* opcode of current frame */
@@ -418,10 +417,10 @@ struct siw_iwarp_tx {
struct siw_qp {
struct ib_qp base_qp;
struct siw_device *sdev;
+ int tx_cpu;
struct kref ref;
struct completion qp_free;
struct list_head devq;
- int tx_cpu;
struct siw_qp_attrs attrs;
struct siw_cep *cep;
@@ -466,7 +465,6 @@ struct siw_qp {
} term_info;
struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */
struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */
- struct rcu_head rcu;
};
/* helper macros */
@@ -659,7 +657,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
static inline int siw_orq_empty(struct siw_qp *qp)
{
- return qp->orq[qp->orq_get % qp->attrs.orq_size].flags == 0 ? 1 : 0;
+ return orq_get_current(qp)->flags == 0 ? 1 : 0;
}
static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp)
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 7de651cb44..86323918a5 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -41,16 +41,6 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
static void siw_sk_assign_cm_upcalls(struct sock *sk)
{
- write_lock_bh(&sk->sk_callback_lock);
- sk->sk_state_change = siw_cm_llp_state_change;
- sk->sk_data_ready = siw_cm_llp_data_ready;
- sk->sk_write_space = siw_cm_llp_write_space;
- sk->sk_error_report = siw_cm_llp_error_report;
- write_unlock_bh(&sk->sk_callback_lock);
-}
-
-static void siw_sk_save_upcalls(struct sock *sk)
-{
struct siw_cep *cep = sk_to_cep(sk);
write_lock_bh(&sk->sk_callback_lock);
@@ -58,6 +48,11 @@ static void siw_sk_save_upcalls(struct sock *sk)
cep->sk_data_ready = sk->sk_data_ready;
cep->sk_write_space = sk->sk_write_space;
cep->sk_error_report = sk->sk_error_report;
+
+ sk->sk_state_change = siw_cm_llp_state_change;
+ sk->sk_data_ready = siw_cm_llp_data_ready;
+ sk->sk_write_space = siw_cm_llp_write_space;
+ sk->sk_error_report = siw_cm_llp_error_report;
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -156,7 +151,6 @@ static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s)
siw_cep_get(cep);
s->sk->sk_user_data = cep;
- siw_sk_save_upcalls(s->sk);
siw_sk_assign_cm_upcalls(s->sk);
}
@@ -364,6 +358,24 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason,
return id->event_handler(id, &event);
}
+static void siw_free_cm_id(struct siw_cep *cep)
+{
+ if (!cep->cm_id)
+ return;
+
+ cep->cm_id->rem_ref(cep->cm_id);
+ cep->cm_id = NULL;
+}
+
+static void siw_destroy_cep_sock(struct siw_cep *cep)
+{
+ if (cep->sock) {
+ siw_socket_disassoc(cep->sock);
+ sock_release(cep->sock);
+ cep->sock = NULL;
+ }
+}
+
/*
* siw_qp_cm_drop()
*
@@ -393,8 +405,7 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule)
}
siw_dbg_cep(cep, "immediate close, state %d\n", cep->state);
- if (qp->term_info.valid)
- siw_send_terminate(qp);
+ siw_send_terminate(qp);
if (cep->cm_id) {
switch (cep->state) {
@@ -416,20 +427,12 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule)
default:
break;
}
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
+ siw_free_cm_id(cep);
siw_cep_put(cep);
}
cep->state = SIW_EPSTATE_CLOSED;
- if (cep->sock) {
- siw_socket_disassoc(cep->sock);
- /*
- * Immediately close socket
- */
- sock_release(cep->sock);
- cep->sock = NULL;
- }
+ siw_destroy_cep_sock(cep);
if (cep->qp) {
cep->qp = NULL;
siw_qp_put(qp);
@@ -445,6 +448,12 @@ void siw_cep_put(struct siw_cep *cep)
kref_put(&cep->ref, __siw_cep_dealloc);
}
+static void siw_cep_set_free_and_put(struct siw_cep *cep)
+{
+ siw_cep_set_free(cep);
+ siw_cep_put(cep);
+}
+
void siw_cep_get(struct siw_cep *cep)
{
kref_get(&cep->ref);
@@ -1061,7 +1070,7 @@ static void siw_cm_work_handler(struct work_struct *w)
/*
* QP scheduled LLP close
*/
- if (cep->qp && cep->qp->term_info.valid)
+ if (cep->qp)
siw_send_terminate(cep->qp);
if (cep->cm_id)
@@ -1175,8 +1184,7 @@ static void siw_cm_work_handler(struct work_struct *w)
cep->sock = NULL;
}
if (cep->cm_id) {
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
+ siw_free_cm_id(cep);
siw_cep_put(cep);
}
}
@@ -1515,9 +1523,7 @@ error:
cep->state = SIW_EPSTATE_CLOSED;
- siw_cep_set_free(cep);
-
- siw_cep_put(cep);
+ siw_cep_set_free_and_put(cep);
} else if (s) {
sock_release(s);
@@ -1548,7 +1554,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
struct siw_cep *cep = (struct siw_cep *)id->provider_data;
struct siw_qp *qp;
struct siw_qp_attrs qp_attrs;
- int rv, max_priv_data = MPA_MAX_PRIVDATA;
+ int rv = -EINVAL, max_priv_data = MPA_MAX_PRIVDATA;
bool wait_for_peer_rts = false;
siw_cep_set_inuse(cep);
@@ -1564,26 +1570,17 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) {
siw_dbg_cep(cep, "out of state\n");
-
- siw_cep_set_free(cep);
- siw_cep_put(cep);
-
- return -ECONNRESET;
+ rv = -ECONNRESET;
+ goto free_cep;
}
qp = siw_qp_id2obj(sdev, params->qpn);
if (!qp) {
WARN(1, "[QP %d] does not exist\n", params->qpn);
- siw_cep_set_free(cep);
- siw_cep_put(cep);
-
- return -EINVAL;
+ goto free_cep;
}
down_write(&qp->state_lock);
- if (qp->attrs.state > SIW_QP_STATE_RTR) {
- rv = -EINVAL;
- up_write(&qp->state_lock);
- goto error;
- }
+ if (qp->attrs.state > SIW_QP_STATE_RTR)
+ goto error_unlock;
siw_dbg_cep(cep, "[QP %d]\n", params->qpn);
if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) {
@@ -1597,9 +1594,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
"[QP %u]: ord %d (max %d), ird %d (max %d)\n",
qp_id(qp), params->ord, sdev->attrs.max_ord,
params->ird, sdev->attrs.max_ird);
- rv = -EINVAL;
- up_write(&qp->state_lock);
- goto error;
+ goto error_unlock;
}
if (cep->enhanced_rdma_conn_est)
max_priv_data -= sizeof(struct mpa_v2_data);
@@ -1609,9 +1604,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep,
"[QP %u]: private data length: %d (max %d)\n",
qp_id(qp), params->private_data_len, max_priv_data);
- rv = -EINVAL;
- up_write(&qp->state_lock);
- goto error;
+ goto error_unlock;
}
if (cep->enhanced_rdma_conn_est) {
if (params->ord > cep->ord) {
@@ -1620,9 +1613,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
} else {
cep->ird = params->ird;
cep->ord = params->ord;
- rv = -EINVAL;
- up_write(&qp->state_lock);
- goto error;
+ goto error_unlock;
}
}
if (params->ird < cep->ird) {
@@ -1631,8 +1622,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
params->ird = cep->ird;
else {
rv = -ENOMEM;
- up_write(&qp->state_lock);
- goto error;
+ goto error_unlock;
}
}
if (cep->mpa.v2_ctrl.ord &
@@ -1679,7 +1669,6 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD |
SIW_QP_ATTR_MPA);
up_write(&qp->state_lock);
-
if (rv)
goto error;
@@ -1702,27 +1691,23 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
siw_cep_set_free(cep);
return 0;
+
+error_unlock:
+ up_write(&qp->state_lock);
error:
- siw_socket_disassoc(cep->sock);
- sock_release(cep->sock);
- cep->sock = NULL;
+ siw_destroy_cep_sock(cep);
cep->state = SIW_EPSTATE_CLOSED;
- if (cep->cm_id) {
- cep->cm_id->rem_ref(id);
- cep->cm_id = NULL;
- }
+ siw_free_cm_id(cep);
if (qp->cep) {
siw_cep_put(cep);
qp->cep = NULL;
}
cep->qp = NULL;
siw_qp_put(qp);
-
- siw_cep_set_free(cep);
- siw_cep_put(cep);
-
+free_cep:
+ siw_cep_set_free_and_put(cep);
return rv;
}
@@ -1744,8 +1729,7 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len)
if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) {
siw_dbg_cep(cep, "out of state\n");
- siw_cep_set_free(cep);
- siw_cep_put(cep); /* put last reference */
+ siw_cep_set_free_and_put(cep); /* put last reference */
return -ECONNRESET;
}
@@ -1756,14 +1740,11 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len)
cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */
siw_send_mpareqrep(cep, pdata, pd_len);
}
- siw_socket_disassoc(cep->sock);
- sock_release(cep->sock);
- cep->sock = NULL;
+ siw_destroy_cep_sock(cep);
cep->state = SIW_EPSTATE_CLOSED;
- siw_cep_set_free(cep);
- siw_cep_put(cep);
+ siw_cep_set_free_and_put(cep);
return 0;
}
@@ -1890,16 +1871,12 @@ error:
if (cep) {
siw_cep_set_inuse(cep);
- if (cep->cm_id) {
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
- }
+ siw_free_cm_id(cep);
cep->sock = NULL;
siw_socket_disassoc(s);
cep->state = SIW_EPSTATE_CLOSED;
- siw_cep_set_free(cep);
- siw_cep_put(cep);
+ siw_cep_set_free_and_put(cep);
}
sock_release(s);
@@ -1923,18 +1900,14 @@ static void siw_drop_listeners(struct iw_cm_id *id)
siw_cep_set_inuse(cep);
- if (cep->cm_id) {
- cep->cm_id->rem_ref(cep->cm_id);
- cep->cm_id = NULL;
- }
+ siw_free_cm_id(cep);
if (cep->sock) {
siw_socket_disassoc(cep->sock);
sock_release(cep->sock);
cep->sock = NULL;
}
cep->state = SIW_EPSTATE_CLOSED;
- siw_cep_set_free(cep);
- siw_cep_put(cep);
+ siw_cep_set_free_and_put(cep);
}
}
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 1ab62982df..61ad8ca3d1 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -109,6 +109,17 @@ static struct {
int num_nodes;
} siw_cpu_info;
+static void siw_destroy_cpulist(int number)
+{
+ int i = 0;
+
+ while (i < number)
+ kfree(siw_cpu_info.tx_valid_cpus[i++]);
+
+ kfree(siw_cpu_info.tx_valid_cpus);
+ siw_cpu_info.tx_valid_cpus = NULL;
+}
+
static int siw_init_cpulist(void)
{
int i, num_nodes = nr_node_ids;
@@ -138,24 +149,11 @@ static int siw_init_cpulist(void)
out_err:
siw_cpu_info.num_nodes = 0;
- while (--i >= 0)
- kfree(siw_cpu_info.tx_valid_cpus[i]);
- kfree(siw_cpu_info.tx_valid_cpus);
- siw_cpu_info.tx_valid_cpus = NULL;
+ siw_destroy_cpulist(i);
return -ENOMEM;
}
-static void siw_destroy_cpulist(void)
-{
- int i = 0;
-
- while (i < siw_cpu_info.num_nodes)
- kfree(siw_cpu_info.tx_valid_cpus[i++]);
-
- kfree(siw_cpu_info.tx_valid_cpus);
-}
-
/*
* Choose CPU with least number of active QP's from NUMA node of
* TX interface.
@@ -558,7 +556,7 @@ out_error:
pr_info("SoftIWARP attach failed. Error: %d\n", rv);
siw_cm_exit();
- siw_destroy_cpulist();
+ siw_destroy_cpulist(siw_cpu_info.num_nodes);
return rv;
}
@@ -573,7 +571,7 @@ static void __exit siw_exit_module(void)
siw_cm_exit();
- siw_destroy_cpulist();
+ siw_destroy_cpulist(siw_cpu_info.num_nodes);
if (siw_crypto_shash)
crypto_free_shash(siw_crypto_shash);
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index c5f7f1669d..dcb963607c 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -5,6 +5,7 @@
#include <linux/gfp.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
@@ -13,18 +14,20 @@
#include "siw.h"
#include "siw_mem.h"
+/* Stag lookup is based on its index part only (24 bits). */
+#define SIW_STAG_MAX_INDEX 0x00ffffff
+
/*
- * Stag lookup is based on its index part only (24 bits).
* The code avoids special Stag of zero and tries to randomize
* STag values between 1 and SIW_STAG_MAX_INDEX.
*/
int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
{
- struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
+ struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
u32 id, next;
get_random_bytes(&next, 4);
- next &= 0x00ffffff;
+ next &= SIW_STAG_MAX_INDEX;
if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
GFP_KERNEL) < 0)
@@ -60,28 +63,17 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
return NULL;
}
-static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
- bool dirty)
-{
- unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
-}
-
-void siw_umem_release(struct siw_umem *umem, bool dirty)
+void siw_umem_release(struct siw_umem *umem)
{
- struct mm_struct *mm_s = umem->owning_mm;
int i, num_pages = umem->num_pages;
- for (i = 0; num_pages; i++) {
- int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
+ if (umem->base_mem)
+ ib_umem_release(umem->base_mem);
- siw_free_plist(&umem->page_chunk[i], to_free,
- umem->writable && dirty);
+ for (i = 0; num_pages > 0; i++) {
kfree(umem->page_chunk[i].plist);
- num_pages -= to_free;
+ num_pages -= PAGES_PER_CHUNK;
}
- atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
-
- mmdrop(mm_s);
kfree(umem->page_chunk);
kfree(umem);
}
@@ -91,7 +83,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
{
struct siw_device *sdev = to_siw_dev(pd->device);
struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
- struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
+ struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
u32 id, next;
if (!mem)
@@ -107,7 +99,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
kref_init(&mem->ref);
get_random_bytes(&next, 4);
- next &= 0x00ffffff;
+ next &= SIW_STAG_MAX_INDEX;
if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
GFP_KERNEL) < 0) {
@@ -145,7 +137,7 @@ void siw_free_mem(struct kref *ref)
if (!mem->is_mw && mem->mem_obj) {
if (mem->is_pbl == 0)
- siw_umem_release(mem->umem, true);
+ siw_umem_release(mem->umem);
else
kfree(mem->pbl);
}
@@ -362,18 +354,16 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf)
return pbl;
}
-struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
+ u64 len, int rights)
{
struct siw_umem *umem;
- struct mm_struct *mm_s;
+ struct ib_umem *base_mem;
+ struct sg_page_iter sg_iter;
+ struct sg_table *sgt;
u64 first_page_va;
- unsigned long mlock_limit;
- unsigned int foll_flags = FOLL_LONGTERM;
int num_pages, num_chunks, i, rv = 0;
- if (!can_do_mlock())
- return ERR_PTR(-EPERM);
-
if (!len)
return ERR_PTR(-EINVAL);
@@ -385,65 +375,50 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
if (!umem)
return ERR_PTR(-ENOMEM);
- mm_s = current->mm;
- umem->owning_mm = mm_s;
- umem->writable = writable;
-
- mmgrab(mm_s);
-
- if (writable)
- foll_flags |= FOLL_WRITE;
-
- mmap_read_lock(mm_s);
-
- mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
- if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
- rv = -ENOMEM;
- goto out_sem_up;
- }
- umem->fp_addr = first_page_va;
-
umem->page_chunk =
kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
if (!umem->page_chunk) {
rv = -ENOMEM;
- goto out_sem_up;
+ goto err_out;
}
- for (i = 0; num_pages; i++) {
+ base_mem = ib_umem_get(base_dev, start, len, rights);
+ if (IS_ERR(base_mem)) {
+ rv = PTR_ERR(base_mem);
+ siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
+ goto err_out;
+ }
+ umem->fp_addr = first_page_va;
+ umem->base_mem = base_mem;
+
+ sgt = &base_mem->sgt_append.sgt;
+ __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
+
+ if (!__sg_page_iter_next(&sg_iter)) {
+ rv = -EINVAL;
+ goto err_out;
+ }
+ for (i = 0; num_pages > 0; i++) {
int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
struct page **plist =
kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
if (!plist) {
rv = -ENOMEM;
- goto out_sem_up;
+ goto err_out;
}
umem->page_chunk[i].plist = plist;
- while (nents) {
- rv = pin_user_pages(first_page_va, nents, foll_flags,
- plist);
- if (rv < 0)
- goto out_sem_up;
-
- umem->num_pages += rv;
- first_page_va += rv * PAGE_SIZE;
- plist += rv;
- nents -= rv;
- num_pages -= rv;
+ while (nents--) {
+ *plist = sg_page_iter_page(&sg_iter);
+ umem->num_pages++;
+ num_pages--;
+ plist++;
+ if (!__sg_page_iter_next(&sg_iter))
+ break;
}
}
-out_sem_up:
- mmap_read_unlock(mm_s);
-
- if (rv > 0)
- return umem;
-
- /* Adjust accounting for pages not pinned */
- if (num_pages)
- atomic64_sub(num_pages, &mm_s->pinned_vm);
-
- siw_umem_release(umem, false);
+ return umem;
+err_out:
+ siw_umem_release(umem);
return ERR_PTR(rv);
}
diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h
index a2835284fe..e74cfcd6db 100644
--- a/drivers/infiniband/sw/siw/siw_mem.h
+++ b/drivers/infiniband/sw/siw/siw_mem.h
@@ -6,8 +6,9 @@
#ifndef _SIW_MEM_H
#define _SIW_MEM_H
-struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable);
-void siw_umem_release(struct siw_umem *umem, bool dirty);
+struct siw_umem *siw_umem_get(struct ib_device *base_dave, u64 start,
+ u64 len, int rights);
+void siw_umem_release(struct siw_umem *umem);
struct siw_pbl *siw_pbl_alloc(u32 num_buf);
dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx);
struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index);
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 26e3904d2f..da92cfa207 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1183,7 +1183,7 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
/*
* siw_sq_flush()
*
- * Flush SQ and ORRQ entries to CQ.
+ * Flush SQ and ORQ entries to CQ.
*
* Must be called with QP state write lock held.
* Therefore, SQ and ORQ lock must not be taken.
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index 33e0fdb362..ed4fc39718 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -405,6 +405,20 @@ out:
return wqe;
}
+static int siw_rx_data(struct siw_mem *mem_p, struct siw_rx_stream *srx,
+ unsigned int *pbl_idx, u64 addr, int bytes)
+{
+ int rv;
+
+ if (mem_p->mem_obj == NULL)
+ rv = siw_rx_kva(srx, ib_virt_dma_to_ptr(addr), bytes);
+ else if (!mem_p->is_pbl)
+ rv = siw_rx_umem(srx, mem_p->umem, addr, bytes);
+ else
+ rv = siw_rx_pbl(srx, pbl_idx, mem_p, addr, bytes);
+ return rv;
+}
+
/*
* siw_proc_send:
*
@@ -485,17 +499,8 @@ int siw_proc_send(struct siw_qp *qp)
break;
}
mem_p = *mem;
- if (mem_p->mem_obj == NULL)
- rv = siw_rx_kva(srx,
- ib_virt_dma_to_ptr(sge->laddr + frx->sge_off),
- sge_bytes);
- else if (!mem_p->is_pbl)
- rv = siw_rx_umem(srx, mem_p->umem,
- sge->laddr + frx->sge_off, sge_bytes);
- else
- rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
- sge->laddr + frx->sge_off, sge_bytes);
-
+ rv = siw_rx_data(mem_p, srx, &frx->pbl_idx,
+ sge->laddr + frx->sge_off, sge_bytes);
if (unlikely(rv != sge_bytes)) {
wqe->processed += rcvd_bytes;
@@ -598,17 +603,8 @@ int siw_proc_write(struct siw_qp *qp)
return -EINVAL;
}
- if (mem->mem_obj == NULL)
- rv = siw_rx_kva(srx,
- (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd),
- bytes);
- else if (!mem->is_pbl)
- rv = siw_rx_umem(srx, mem->umem,
- srx->ddp_to + srx->fpdu_part_rcvd, bytes);
- else
- rv = siw_rx_pbl(srx, &frx->pbl_idx, mem,
- srx->ddp_to + srx->fpdu_part_rcvd, bytes);
-
+ rv = siw_rx_data(mem, srx, &frx->pbl_idx,
+ srx->ddp_to + srx->fpdu_part_rcvd, bytes);
if (unlikely(rv != bytes)) {
siw_init_terminate(qp, TERM_ERROR_LAYER_DDP,
DDP_ETYPE_CATASTROPHIC,
@@ -849,17 +845,8 @@ int siw_proc_rresp(struct siw_qp *qp)
mem_p = *mem;
bytes = min(srx->fpdu_part_rem, srx->skb_new);
-
- if (mem_p->mem_obj == NULL)
- rv = siw_rx_kva(srx,
- ib_virt_dma_to_ptr(sge->laddr + wqe->processed),
- bytes);
- else if (!mem_p->is_pbl)
- rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed,
- bytes);
- else
- rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p,
- sge->laddr + wqe->processed, bytes);
+ rv = siw_rx_data(mem_p, srx, &frx->pbl_idx,
+ sge->laddr + wqe->processed, bytes);
if (rv != bytes) {
wqe->wc_status = SIW_WC_GENERAL_ERR;
rv = -EINVAL;
@@ -881,6 +868,13 @@ error_term:
return rv;
}
+static void siw_update_skb_rcvd(struct siw_rx_stream *srx, u16 length)
+{
+ srx->skb_offset += length;
+ srx->skb_new -= length;
+ srx->skb_copied += length;
+}
+
int siw_proc_terminate(struct siw_qp *qp)
{
struct siw_rx_stream *srx = &qp->rx_stream;
@@ -925,9 +919,7 @@ int siw_proc_terminate(struct siw_qp *qp)
goto out;
infop += to_copy;
- srx->skb_offset += to_copy;
- srx->skb_new -= to_copy;
- srx->skb_copied += to_copy;
+ siw_update_skb_rcvd(srx, to_copy);
srx->fpdu_part_rcvd += to_copy;
srx->fpdu_part_rem -= to_copy;
@@ -949,9 +941,7 @@ int siw_proc_terminate(struct siw_qp *qp)
term->flag_m ? "valid" : "invalid");
}
out:
- srx->skb_new -= to_copy;
- srx->skb_offset += to_copy;
- srx->skb_copied += to_copy;
+ siw_update_skb_rcvd(srx, to_copy);
srx->fpdu_part_rcvd += to_copy;
srx->fpdu_part_rem -= to_copy;
@@ -970,9 +960,7 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
skb_copy_bits(skb, srx->skb_offset, tbuf, avail);
- srx->skb_new -= avail;
- srx->skb_offset += avail;
- srx->skb_copied += avail;
+ siw_update_skb_rcvd(srx, avail);
srx->fpdu_part_rem -= avail;
if (srx->fpdu_part_rem)
@@ -1023,12 +1011,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
skb_copy_bits(skb, srx->skb_offset,
(char *)c_hdr + srx->fpdu_part_rcvd, bytes);
+ siw_update_skb_rcvd(srx, bytes);
srx->fpdu_part_rcvd += bytes;
-
- srx->skb_new -= bytes;
- srx->skb_offset += bytes;
- srx->skb_copied += bytes;
-
if (srx->fpdu_part_rcvd < MIN_DDP_HDR)
return -EAGAIN;
@@ -1091,12 +1075,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
skb_copy_bits(skb, srx->skb_offset,
(char *)c_hdr + srx->fpdu_part_rcvd, bytes);
+ siw_update_skb_rcvd(srx, bytes);
srx->fpdu_part_rcvd += bytes;
-
- srx->skb_new -= bytes;
- srx->skb_offset += bytes;
- srx->skb_copied += bytes;
-
if (srx->fpdu_part_rcvd < hdrlen)
return -EAGAIN;
}
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index b2c06100cf..64ad9e0895 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -34,6 +34,15 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
return NULL;
}
+static struct page *siw_get_page(struct siw_mem *mem, struct siw_sge *sge,
+ unsigned long offset, int *pbl_idx)
+{
+ if (!mem->is_pbl)
+ return siw_get_upage(mem->umem, sge->laddr + offset);
+ else
+ return siw_get_pblpage(mem, sge->laddr + offset, pbl_idx);
+}
+
/*
* Copy short payload at provided destination payload address
*/
@@ -67,11 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
char *buffer;
int pbl_idx = 0;
- if (!mem->is_pbl)
- p = siw_get_upage(mem->umem, sge->laddr);
- else
- p = siw_get_pblpage(mem, sge->laddr, &pbl_idx);
-
+ p = siw_get_page(mem, sge, 0, &pbl_idx);
if (unlikely(!p))
return -EFAULT;
@@ -85,13 +90,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr)
memcpy(paddr, buffer + off, part);
kunmap_local(buffer);
- if (!mem->is_pbl)
- p = siw_get_upage(mem->umem,
- sge->laddr + part);
- else
- p = siw_get_pblpage(mem,
- sge->laddr + part,
- &pbl_idx);
+ p = siw_get_page(mem, sge, part, &pbl_idx);
if (unlikely(!p))
return -EFAULT;
@@ -249,14 +248,10 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx)
/*
* Do complete CRC if enabled and short packet
*/
- if (c_tx->mpa_crc_hd) {
- crypto_shash_init(c_tx->mpa_crc_hd);
- if (crypto_shash_update(c_tx->mpa_crc_hd,
- (u8 *)&c_tx->pkt,
- c_tx->ctrl_len))
- return -EINVAL;
- crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc);
- }
+ if (c_tx->mpa_crc_hd &&
+ crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
+ c_tx->ctrl_len, (u8 *)crc) != 0)
+ return -EINVAL;
c_tx->ctrl_len += MPA_CRC_SIZE;
return PKT_COMPLETE;
@@ -297,8 +292,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s,
(char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent,
.iov_len = c_tx->ctrl_len - c_tx->ctrl_sent };
- int rv = kernel_sendmsg(s, &msg, &iov, 1,
- c_tx->ctrl_len - c_tx->ctrl_sent);
+ int rv = kernel_sendmsg(s, &msg, &iov, 1, iov.iov_len);
if (rv >= 0) {
c_tx->ctrl_sent += rv;
@@ -502,13 +496,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
if (!is_kva) {
struct page *p;
- if (mem->is_pbl)
- p = siw_get_pblpage(
- mem, sge->laddr + sge_off,
- &pbl_idx);
- else
- p = siw_get_upage(mem->umem,
- sge->laddr + sge_off);
+ p = siw_get_page(mem, sge, sge_off, &pbl_idx);
if (unlikely(!p)) {
siw_unmap_pages(iov, kmap_mask, seg);
wqe->processed -= c_tx->bytes_unsent;
@@ -1009,13 +997,12 @@ static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe)
* MPA FPDUs, each containing a DDP segment.
*
* SQ processing may occur in user context as a result of posting
- * new WQE's or from siw_sq_work_handler() context. Processing in
+ * new WQE's or from siw_tx_thread context. Processing in
* user context is limited to non-kernel verbs users.
*
* SQ processing may get paused anytime, possibly in the middle of a WR
* or FPDU, if insufficient send space is available. SQ processing
- * gets resumed from siw_sq_work_handler(), if send space becomes
- * available again.
+ * gets resumed from siw_tx_thread, if send space becomes available again.
*
* Must be called with the QP state read-locked.
*
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index c5c27db9c2..ecf0444666 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -19,6 +19,15 @@
#include "siw_verbs.h"
#include "siw_mem.h"
+static int siw_qp_state_to_ib_qp_state[SIW_QP_STATE_COUNT] = {
+ [SIW_QP_STATE_IDLE] = IB_QPS_INIT,
+ [SIW_QP_STATE_RTR] = IB_QPS_RTR,
+ [SIW_QP_STATE_RTS] = IB_QPS_RTS,
+ [SIW_QP_STATE_CLOSING] = IB_QPS_SQD,
+ [SIW_QP_STATE_TERMINATE] = IB_QPS_SQE,
+ [SIW_QP_STATE_ERROR] = IB_QPS_ERR
+};
+
static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = {
[IB_QPS_RESET] = SIW_QP_STATE_IDLE,
[IB_QPS_INIT] = SIW_QP_STATE_IDLE,
@@ -66,12 +75,9 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
entry = to_siw_mmap_entry(rdma_entry);
rv = remap_vmalloc_range(vma, entry->address, 0);
- if (rv) {
+ if (rv)
pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff,
size);
- goto out;
- }
-out:
rdma_user_mmap_entry_put(rdma_entry);
return rv;
@@ -336,11 +342,10 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
goto err_atomic;
}
/*
- * NOTE: we allow for zero element SQ and RQ WQE's SGL's
- * but not for a QP unable to hold any WQE (SQ + RQ)
+ * NOTE: we don't allow for a QP unable to hold any SQ WQE
*/
- if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) {
- siw_dbg(base_dev, "QP must have send or receive queue\n");
+ if (attrs->cap.max_send_wr == 0) {
+ siw_dbg(base_dev, "QP must have send queue\n");
rv = -EINVAL;
goto err_atomic;
}
@@ -360,21 +365,14 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
if (rv)
goto err_atomic;
- num_sqe = attrs->cap.max_send_wr;
- num_rqe = attrs->cap.max_recv_wr;
/* All queue indices are derived from modulo operations
* on a free running 'get' (consumer) and 'put' (producer)
* unsigned counter. Having queue sizes at power of two
* avoids handling counter wrap around.
*/
- if (num_sqe)
- num_sqe = roundup_pow_of_two(num_sqe);
- else {
- /* Zero sized SQ is not supported */
- rv = -EINVAL;
- goto err_out_xa;
- }
+ num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr);
+ num_rqe = attrs->cap.max_recv_wr;
if (num_rqe)
num_rqe = roundup_pow_of_two(num_rqe);
@@ -515,6 +513,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr,
} else {
return -EINVAL;
}
+ qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state];
qp_attr->cap.max_inline_data = SIW_MAX_INLINE;
qp_attr->cap.max_send_wr = qp->attrs.sq_size;
qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges;
@@ -1321,8 +1320,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
struct siw_umem *umem = NULL;
struct siw_ureq_reg_mr ureq;
struct siw_device *sdev = to_siw_dev(pd->device);
-
- unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
int rv;
siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
@@ -1338,20 +1335,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
rv = -EINVAL;
goto err_out;
}
- if (mem_limit != RLIM_INFINITY) {
- unsigned long num_pages =
- (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
- mem_limit >>= PAGE_SHIFT;
-
- if (num_pages > mem_limit - current->mm->locked_vm) {
- siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
- num_pages, mem_limit,
- current->mm->locked_vm);
- rv = -ENOMEM;
- goto err_out;
- }
- }
- umem = siw_umem_get(start, len, ib_access_writable(rights));
+ umem = siw_umem_get(pd->device, start, len, rights);
if (IS_ERR(umem)) {
rv = PTR_ERR(umem);
siw_dbg_pd(pd, "getting user memory failed: %d\n", rv);
@@ -1404,7 +1388,7 @@ err_out:
kfree_rcu(mr, rcu);
} else {
if (umem)
- siw_umem_release(umem, false);
+ siw_umem_release(umem);
}
return ERR_PTR(rv);
}