diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 18:50:12 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 18:50:12 +0000 |
commit | 8665bd53f2f2e27e5511d90428cb3f60e6d0ce15 (patch) | |
tree | 8d58900dc0ebd4a3011f92c128d2fe45bc7c4bf2 /drivers/infiniband/sw/siw | |
parent | Adding debian version 6.7.12-1. (diff) | |
download | linux-8665bd53f2f2e27e5511d90428cb3f60e6d0ce15.tar.xz linux-8665bd53f2f2e27e5511d90428cb3f60e6d0ce15.zip |
Merging upstream version 6.8.9.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/infiniband/sw/siw')
-rw-r--r-- | drivers/infiniband/sw/siw/siw.h | 14 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_cm.c | 145 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_main.c | 30 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_mem.c | 121 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_mem.h | 5 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_qp.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_qp_rx.c | 84 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_qp_tx.c | 51 | ||||
-rw-r--r-- | drivers/infiniband/sw/siw/siw_verbs.c | 52 |
9 files changed, 200 insertions, 304 deletions
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index cec5cccd2e..75253f2b3e 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -121,11 +121,10 @@ struct siw_page_chunk { }; struct siw_umem { + struct ib_umem *base_mem; struct siw_page_chunk *page_chunk; int num_pages; - bool writable; u64 fp_addr; /* First page base address */ - struct mm_struct *owning_mm; }; struct siw_pble { @@ -289,10 +288,11 @@ struct siw_rx_stream { int skb_offset; /* offset in skb */ int skb_copied; /* processed bytes in skb */ + enum siw_rx_state state; + union iwarp_hdr hdr; struct mpa_trailer trailer; - - enum siw_rx_state state; + struct shash_desc *mpa_crc_hd; /* * For each FPDU, main RX loop runs through 3 stages: @@ -314,7 +314,6 @@ struct siw_rx_stream { u64 ddp_to; u32 inval_stag; /* Stag to be invalidated */ - struct shash_desc *mpa_crc_hd; u8 rx_suspend : 1; u8 pad : 2; /* # of pad bytes expected */ u8 rdmap_op : 4; /* opcode of current frame */ @@ -418,10 +417,10 @@ struct siw_iwarp_tx { struct siw_qp { struct ib_qp base_qp; struct siw_device *sdev; + int tx_cpu; struct kref ref; struct completion qp_free; struct list_head devq; - int tx_cpu; struct siw_qp_attrs attrs; struct siw_cep *cep; @@ -466,7 +465,6 @@ struct siw_qp { } term_info; struct rdma_user_mmap_entry *sq_entry; /* mmap info for SQE array */ struct rdma_user_mmap_entry *rq_entry; /* mmap info for RQE array */ - struct rcu_head rcu; }; /* helper macros */ @@ -659,7 +657,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) static inline int siw_orq_empty(struct siw_qp *qp) { - return qp->orq[qp->orq_get % qp->attrs.orq_size].flags == 0 ? 1 : 0; + return orq_get_current(qp)->flags == 0 ? 1 : 0; } static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp) diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 7de651cb44..86323918a5 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -41,16 +41,6 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, static void siw_sk_assign_cm_upcalls(struct sock *sk) { - write_lock_bh(&sk->sk_callback_lock); - sk->sk_state_change = siw_cm_llp_state_change; - sk->sk_data_ready = siw_cm_llp_data_ready; - sk->sk_write_space = siw_cm_llp_write_space; - sk->sk_error_report = siw_cm_llp_error_report; - write_unlock_bh(&sk->sk_callback_lock); -} - -static void siw_sk_save_upcalls(struct sock *sk) -{ struct siw_cep *cep = sk_to_cep(sk); write_lock_bh(&sk->sk_callback_lock); @@ -58,6 +48,11 @@ static void siw_sk_save_upcalls(struct sock *sk) cep->sk_data_ready = sk->sk_data_ready; cep->sk_write_space = sk->sk_write_space; cep->sk_error_report = sk->sk_error_report; + + sk->sk_state_change = siw_cm_llp_state_change; + sk->sk_data_ready = siw_cm_llp_data_ready; + sk->sk_write_space = siw_cm_llp_write_space; + sk->sk_error_report = siw_cm_llp_error_report; write_unlock_bh(&sk->sk_callback_lock); } @@ -156,7 +151,6 @@ static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s) siw_cep_get(cep); s->sk->sk_user_data = cep; - siw_sk_save_upcalls(s->sk); siw_sk_assign_cm_upcalls(s->sk); } @@ -364,6 +358,24 @@ static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, return id->event_handler(id, &event); } +static void siw_free_cm_id(struct siw_cep *cep) +{ + if (!cep->cm_id) + return; + + cep->cm_id->rem_ref(cep->cm_id); + cep->cm_id = NULL; +} + +static void siw_destroy_cep_sock(struct siw_cep *cep) +{ + if (cep->sock) { + siw_socket_disassoc(cep->sock); + sock_release(cep->sock); + cep->sock = NULL; + } +} + /* * siw_qp_cm_drop() * @@ -393,8 +405,7 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule) } siw_dbg_cep(cep, "immediate close, state %d\n", cep->state); - if (qp->term_info.valid) - siw_send_terminate(qp); + siw_send_terminate(qp); if (cep->cm_id) { switch (cep->state) { @@ -416,20 +427,12 @@ void siw_qp_cm_drop(struct siw_qp *qp, int schedule) default: break; } - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; + siw_free_cm_id(cep); siw_cep_put(cep); } cep->state = SIW_EPSTATE_CLOSED; - if (cep->sock) { - siw_socket_disassoc(cep->sock); - /* - * Immediately close socket - */ - sock_release(cep->sock); - cep->sock = NULL; - } + siw_destroy_cep_sock(cep); if (cep->qp) { cep->qp = NULL; siw_qp_put(qp); @@ -445,6 +448,12 @@ void siw_cep_put(struct siw_cep *cep) kref_put(&cep->ref, __siw_cep_dealloc); } +static void siw_cep_set_free_and_put(struct siw_cep *cep) +{ + siw_cep_set_free(cep); + siw_cep_put(cep); +} + void siw_cep_get(struct siw_cep *cep) { kref_get(&cep->ref); @@ -1061,7 +1070,7 @@ static void siw_cm_work_handler(struct work_struct *w) /* * QP scheduled LLP close */ - if (cep->qp && cep->qp->term_info.valid) + if (cep->qp) siw_send_terminate(cep->qp); if (cep->cm_id) @@ -1175,8 +1184,7 @@ static void siw_cm_work_handler(struct work_struct *w) cep->sock = NULL; } if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; + siw_free_cm_id(cep); siw_cep_put(cep); } } @@ -1515,9 +1523,7 @@ error: cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } else if (s) { sock_release(s); @@ -1548,7 +1554,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) struct siw_cep *cep = (struct siw_cep *)id->provider_data; struct siw_qp *qp; struct siw_qp_attrs qp_attrs; - int rv, max_priv_data = MPA_MAX_PRIVDATA; + int rv = -EINVAL, max_priv_data = MPA_MAX_PRIVDATA; bool wait_for_peer_rts = false; siw_cep_set_inuse(cep); @@ -1564,26 +1570,17 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { siw_dbg_cep(cep, "out of state\n"); - - siw_cep_set_free(cep); - siw_cep_put(cep); - - return -ECONNRESET; + rv = -ECONNRESET; + goto free_cep; } qp = siw_qp_id2obj(sdev, params->qpn); if (!qp) { WARN(1, "[QP %d] does not exist\n", params->qpn); - siw_cep_set_free(cep); - siw_cep_put(cep); - - return -EINVAL; + goto free_cep; } down_write(&qp->state_lock); - if (qp->attrs.state > SIW_QP_STATE_RTR) { - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; - } + if (qp->attrs.state > SIW_QP_STATE_RTR) + goto error_unlock; siw_dbg_cep(cep, "[QP %d]\n", params->qpn); if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { @@ -1597,9 +1594,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) "[QP %u]: ord %d (max %d), ird %d (max %d)\n", qp_id(qp), params->ord, sdev->attrs.max_ord, params->ird, sdev->attrs.max_ird); - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } if (cep->enhanced_rdma_conn_est) max_priv_data -= sizeof(struct mpa_v2_data); @@ -1609,9 +1604,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep, "[QP %u]: private data length: %d (max %d)\n", qp_id(qp), params->private_data_len, max_priv_data); - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } if (cep->enhanced_rdma_conn_est) { if (params->ord > cep->ord) { @@ -1620,9 +1613,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) } else { cep->ird = params->ird; cep->ord = params->ord; - rv = -EINVAL; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } } if (params->ird < cep->ird) { @@ -1631,8 +1622,7 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) params->ird = cep->ird; else { rv = -ENOMEM; - up_write(&qp->state_lock); - goto error; + goto error_unlock; } } if (cep->mpa.v2_ctrl.ord & @@ -1679,7 +1669,6 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA); up_write(&qp->state_lock); - if (rv) goto error; @@ -1702,27 +1691,23 @@ int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) siw_cep_set_free(cep); return 0; + +error_unlock: + up_write(&qp->state_lock); error: - siw_socket_disassoc(cep->sock); - sock_release(cep->sock); - cep->sock = NULL; + siw_destroy_cep_sock(cep); cep->state = SIW_EPSTATE_CLOSED; - if (cep->cm_id) { - cep->cm_id->rem_ref(id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); if (qp->cep) { siw_cep_put(cep); qp->cep = NULL; } cep->qp = NULL; siw_qp_put(qp); - - siw_cep_set_free(cep); - siw_cep_put(cep); - +free_cep: + siw_cep_set_free_and_put(cep); return rv; } @@ -1744,8 +1729,7 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { siw_dbg_cep(cep, "out of state\n"); - siw_cep_set_free(cep); - siw_cep_put(cep); /* put last reference */ + siw_cep_set_free_and_put(cep); /* put last reference */ return -ECONNRESET; } @@ -1756,14 +1740,11 @@ int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ siw_send_mpareqrep(cep, pdata, pd_len); } - siw_socket_disassoc(cep->sock); - sock_release(cep->sock); - cep->sock = NULL; + siw_destroy_cep_sock(cep); cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); return 0; } @@ -1890,16 +1871,12 @@ error: if (cep) { siw_cep_set_inuse(cep); - if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); cep->sock = NULL; siw_socket_disassoc(s); cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } sock_release(s); @@ -1923,18 +1900,14 @@ static void siw_drop_listeners(struct iw_cm_id *id) siw_cep_set_inuse(cep); - if (cep->cm_id) { - cep->cm_id->rem_ref(cep->cm_id); - cep->cm_id = NULL; - } + siw_free_cm_id(cep); if (cep->sock) { siw_socket_disassoc(cep->sock); sock_release(cep->sock); cep->sock = NULL; } cep->state = SIW_EPSTATE_CLOSED; - siw_cep_set_free(cep); - siw_cep_put(cep); + siw_cep_set_free_and_put(cep); } } diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 1ab62982df..61ad8ca3d1 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -109,6 +109,17 @@ static struct { int num_nodes; } siw_cpu_info; +static void siw_destroy_cpulist(int number) +{ + int i = 0; + + while (i < number) + kfree(siw_cpu_info.tx_valid_cpus[i++]); + + kfree(siw_cpu_info.tx_valid_cpus); + siw_cpu_info.tx_valid_cpus = NULL; +} + static int siw_init_cpulist(void) { int i, num_nodes = nr_node_ids; @@ -138,24 +149,11 @@ static int siw_init_cpulist(void) out_err: siw_cpu_info.num_nodes = 0; - while (--i >= 0) - kfree(siw_cpu_info.tx_valid_cpus[i]); - kfree(siw_cpu_info.tx_valid_cpus); - siw_cpu_info.tx_valid_cpus = NULL; + siw_destroy_cpulist(i); return -ENOMEM; } -static void siw_destroy_cpulist(void) -{ - int i = 0; - - while (i < siw_cpu_info.num_nodes) - kfree(siw_cpu_info.tx_valid_cpus[i++]); - - kfree(siw_cpu_info.tx_valid_cpus); -} - /* * Choose CPU with least number of active QP's from NUMA node of * TX interface. @@ -558,7 +556,7 @@ out_error: pr_info("SoftIWARP attach failed. Error: %d\n", rv); siw_cm_exit(); - siw_destroy_cpulist(); + siw_destroy_cpulist(siw_cpu_info.num_nodes); return rv; } @@ -573,7 +571,7 @@ static void __exit siw_exit_module(void) siw_cm_exit(); - siw_destroy_cpulist(); + siw_destroy_cpulist(siw_cpu_info.num_nodes); if (siw_crypto_shash) crypto_free_shash(siw_crypto_shash); diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c index c5f7f1669d..dcb963607c 100644 --- a/drivers/infiniband/sw/siw/siw_mem.c +++ b/drivers/infiniband/sw/siw/siw_mem.c @@ -5,6 +5,7 @@ #include <linux/gfp.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_umem.h> #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/sched/mm.h> @@ -13,18 +14,20 @@ #include "siw.h" #include "siw_mem.h" +/* Stag lookup is based on its index part only (24 bits). */ +#define SIW_STAG_MAX_INDEX 0x00ffffff + /* - * Stag lookup is based on its index part only (24 bits). * The code avoids special Stag of zero and tries to randomize * STag values between 1 and SIW_STAG_MAX_INDEX. */ int siw_mem_add(struct siw_device *sdev, struct siw_mem *m) { - struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); u32 id, next; get_random_bytes(&next, 4); - next &= 0x00ffffff; + next &= SIW_STAG_MAX_INDEX; if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next, GFP_KERNEL) < 0) @@ -60,28 +63,17 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index) return NULL; } -static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages, - bool dirty) -{ - unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty); -} - -void siw_umem_release(struct siw_umem *umem, bool dirty) +void siw_umem_release(struct siw_umem *umem) { - struct mm_struct *mm_s = umem->owning_mm; int i, num_pages = umem->num_pages; - for (i = 0; num_pages; i++) { - int to_free = min_t(int, PAGES_PER_CHUNK, num_pages); + if (umem->base_mem) + ib_umem_release(umem->base_mem); - siw_free_plist(&umem->page_chunk[i], to_free, - umem->writable && dirty); + for (i = 0; num_pages > 0; i++) { kfree(umem->page_chunk[i].plist); - num_pages -= to_free; + num_pages -= PAGES_PER_CHUNK; } - atomic64_sub(umem->num_pages, &mm_s->pinned_vm); - - mmdrop(mm_s); kfree(umem->page_chunk); kfree(umem); } @@ -91,7 +83,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, { struct siw_device *sdev = to_siw_dev(pd->device); struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL); - struct xa_limit limit = XA_LIMIT(1, 0x00ffffff); + struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX); u32 id, next; if (!mem) @@ -107,7 +99,7 @@ int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj, kref_init(&mem->ref); get_random_bytes(&next, 4); - next &= 0x00ffffff; + next &= SIW_STAG_MAX_INDEX; if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next, GFP_KERNEL) < 0) { @@ -145,7 +137,7 @@ void siw_free_mem(struct kref *ref) if (!mem->is_mw && mem->mem_obj) { if (mem->is_pbl == 0) - siw_umem_release(mem->umem, true); + siw_umem_release(mem->umem); else kfree(mem->pbl); } @@ -362,18 +354,16 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf) return pbl; } -struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) +struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start, + u64 len, int rights) { struct siw_umem *umem; - struct mm_struct *mm_s; + struct ib_umem *base_mem; + struct sg_page_iter sg_iter; + struct sg_table *sgt; u64 first_page_va; - unsigned long mlock_limit; - unsigned int foll_flags = FOLL_LONGTERM; int num_pages, num_chunks, i, rv = 0; - if (!can_do_mlock()) - return ERR_PTR(-EPERM); - if (!len) return ERR_PTR(-EINVAL); @@ -385,65 +375,50 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable) if (!umem) return ERR_PTR(-ENOMEM); - mm_s = current->mm; - umem->owning_mm = mm_s; - umem->writable = writable; - - mmgrab(mm_s); - - if (writable) - foll_flags |= FOLL_WRITE; - - mmap_read_lock(mm_s); - - mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) { - rv = -ENOMEM; - goto out_sem_up; - } - umem->fp_addr = first_page_va; - umem->page_chunk = kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL); if (!umem->page_chunk) { rv = -ENOMEM; - goto out_sem_up; + goto err_out; } - for (i = 0; num_pages; i++) { + base_mem = ib_umem_get(base_dev, start, len, rights); + if (IS_ERR(base_mem)) { + rv = PTR_ERR(base_mem); + siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv); + goto err_out; + } + umem->fp_addr = first_page_va; + umem->base_mem = base_mem; + + sgt = &base_mem->sgt_append.sgt; + __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0); + + if (!__sg_page_iter_next(&sg_iter)) { + rv = -EINVAL; + goto err_out; + } + for (i = 0; num_pages > 0; i++) { int nents = min_t(int, num_pages, PAGES_PER_CHUNK); struct page **plist = kcalloc(nents, sizeof(struct page *), GFP_KERNEL); if (!plist) { rv = -ENOMEM; - goto out_sem_up; + goto err_out; } umem->page_chunk[i].plist = plist; - while (nents) { - rv = pin_user_pages(first_page_va, nents, foll_flags, - plist); - if (rv < 0) - goto out_sem_up; - - umem->num_pages += rv; - first_page_va += rv * PAGE_SIZE; - plist += rv; - nents -= rv; - num_pages -= rv; + while (nents--) { + *plist = sg_page_iter_page(&sg_iter); + umem->num_pages++; + num_pages--; + plist++; + if (!__sg_page_iter_next(&sg_iter)) + break; } } -out_sem_up: - mmap_read_unlock(mm_s); - - if (rv > 0) - return umem; - - /* Adjust accounting for pages not pinned */ - if (num_pages) - atomic64_sub(num_pages, &mm_s->pinned_vm); - - siw_umem_release(umem, false); + return umem; +err_out: + siw_umem_release(umem); return ERR_PTR(rv); } diff --git a/drivers/infiniband/sw/siw/siw_mem.h b/drivers/infiniband/sw/siw/siw_mem.h index a2835284fe..e74cfcd6db 100644 --- a/drivers/infiniband/sw/siw/siw_mem.h +++ b/drivers/infiniband/sw/siw/siw_mem.h @@ -6,8 +6,9 @@ #ifndef _SIW_MEM_H #define _SIW_MEM_H -struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable); -void siw_umem_release(struct siw_umem *umem, bool dirty); +struct siw_umem *siw_umem_get(struct ib_device *base_dave, u64 start, + u64 len, int rights); +void siw_umem_release(struct siw_umem *umem); struct siw_pbl *siw_pbl_alloc(u32 num_buf); dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx); struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 26e3904d2f..da92cfa207 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1183,7 +1183,7 @@ int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes, /* * siw_sq_flush() * - * Flush SQ and ORRQ entries to CQ. + * Flush SQ and ORQ entries to CQ. * * Must be called with QP state write lock held. * Therefore, SQ and ORQ lock must not be taken. diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 33e0fdb362..ed4fc39718 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -405,6 +405,20 @@ out: return wqe; } +static int siw_rx_data(struct siw_mem *mem_p, struct siw_rx_stream *srx, + unsigned int *pbl_idx, u64 addr, int bytes) +{ + int rv; + + if (mem_p->mem_obj == NULL) + rv = siw_rx_kva(srx, ib_virt_dma_to_ptr(addr), bytes); + else if (!mem_p->is_pbl) + rv = siw_rx_umem(srx, mem_p->umem, addr, bytes); + else + rv = siw_rx_pbl(srx, pbl_idx, mem_p, addr, bytes); + return rv; +} + /* * siw_proc_send: * @@ -485,17 +499,8 @@ int siw_proc_send(struct siw_qp *qp) break; } mem_p = *mem; - if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, - ib_virt_dma_to_ptr(sge->laddr + frx->sge_off), - sge_bytes); - else if (!mem_p->is_pbl) - rv = siw_rx_umem(srx, mem_p->umem, - sge->laddr + frx->sge_off, sge_bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, - sge->laddr + frx->sge_off, sge_bytes); - + rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, + sge->laddr + frx->sge_off, sge_bytes); if (unlikely(rv != sge_bytes)) { wqe->processed += rcvd_bytes; @@ -598,17 +603,8 @@ int siw_proc_write(struct siw_qp *qp) return -EINVAL; } - if (mem->mem_obj == NULL) - rv = siw_rx_kva(srx, - (void *)(uintptr_t)(srx->ddp_to + srx->fpdu_part_rcvd), - bytes); - else if (!mem->is_pbl) - rv = siw_rx_umem(srx, mem->umem, - srx->ddp_to + srx->fpdu_part_rcvd, bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem, - srx->ddp_to + srx->fpdu_part_rcvd, bytes); - + rv = siw_rx_data(mem, srx, &frx->pbl_idx, + srx->ddp_to + srx->fpdu_part_rcvd, bytes); if (unlikely(rv != bytes)) { siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC, @@ -849,17 +845,8 @@ int siw_proc_rresp(struct siw_qp *qp) mem_p = *mem; bytes = min(srx->fpdu_part_rem, srx->skb_new); - - if (mem_p->mem_obj == NULL) - rv = siw_rx_kva(srx, - ib_virt_dma_to_ptr(sge->laddr + wqe->processed), - bytes); - else if (!mem_p->is_pbl) - rv = siw_rx_umem(srx, mem_p->umem, sge->laddr + wqe->processed, - bytes); - else - rv = siw_rx_pbl(srx, &frx->pbl_idx, mem_p, - sge->laddr + wqe->processed, bytes); + rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, + sge->laddr + wqe->processed, bytes); if (rv != bytes) { wqe->wc_status = SIW_WC_GENERAL_ERR; rv = -EINVAL; @@ -881,6 +868,13 @@ error_term: return rv; } +static void siw_update_skb_rcvd(struct siw_rx_stream *srx, u16 length) +{ + srx->skb_offset += length; + srx->skb_new -= length; + srx->skb_copied += length; +} + int siw_proc_terminate(struct siw_qp *qp) { struct siw_rx_stream *srx = &qp->rx_stream; @@ -925,9 +919,7 @@ int siw_proc_terminate(struct siw_qp *qp) goto out; infop += to_copy; - srx->skb_offset += to_copy; - srx->skb_new -= to_copy; - srx->skb_copied += to_copy; + siw_update_skb_rcvd(srx, to_copy); srx->fpdu_part_rcvd += to_copy; srx->fpdu_part_rem -= to_copy; @@ -949,9 +941,7 @@ int siw_proc_terminate(struct siw_qp *qp) term->flag_m ? "valid" : "invalid"); } out: - srx->skb_new -= to_copy; - srx->skb_offset += to_copy; - srx->skb_copied += to_copy; + siw_update_skb_rcvd(srx, to_copy); srx->fpdu_part_rcvd += to_copy; srx->fpdu_part_rem -= to_copy; @@ -970,9 +960,7 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, tbuf, avail); - srx->skb_new -= avail; - srx->skb_offset += avail; - srx->skb_copied += avail; + siw_update_skb_rcvd(srx, avail); srx->fpdu_part_rem -= avail; if (srx->fpdu_part_rem) @@ -1023,12 +1011,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + siw_update_skb_rcvd(srx, bytes); srx->fpdu_part_rcvd += bytes; - - srx->skb_new -= bytes; - srx->skb_offset += bytes; - srx->skb_copied += bytes; - if (srx->fpdu_part_rcvd < MIN_DDP_HDR) return -EAGAIN; @@ -1091,12 +1075,8 @@ static int siw_get_hdr(struct siw_rx_stream *srx) skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); + siw_update_skb_rcvd(srx, bytes); srx->fpdu_part_rcvd += bytes; - - srx->skb_new -= bytes; - srx->skb_offset += bytes; - srx->skb_copied += bytes; - if (srx->fpdu_part_rcvd < hdrlen) return -EAGAIN; } diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index b2c06100cf..64ad9e0895 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -34,6 +34,15 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) return NULL; } +static struct page *siw_get_page(struct siw_mem *mem, struct siw_sge *sge, + unsigned long offset, int *pbl_idx) +{ + if (!mem->is_pbl) + return siw_get_upage(mem->umem, sge->laddr + offset); + else + return siw_get_pblpage(mem, sge->laddr + offset, pbl_idx); +} + /* * Copy short payload at provided destination payload address */ @@ -67,11 +76,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) char *buffer; int pbl_idx = 0; - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, sge->laddr); - else - p = siw_get_pblpage(mem, sge->laddr, &pbl_idx); - + p = siw_get_page(mem, sge, 0, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -85,13 +90,7 @@ static int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) memcpy(paddr, buffer + off, part); kunmap_local(buffer); - if (!mem->is_pbl) - p = siw_get_upage(mem->umem, - sge->laddr + part); - else - p = siw_get_pblpage(mem, - sge->laddr + part, - &pbl_idx); + p = siw_get_page(mem, sge, part, &pbl_idx); if (unlikely(!p)) return -EFAULT; @@ -249,14 +248,10 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) /* * Do complete CRC if enabled and short packet */ - if (c_tx->mpa_crc_hd) { - crypto_shash_init(c_tx->mpa_crc_hd); - if (crypto_shash_update(c_tx->mpa_crc_hd, - (u8 *)&c_tx->pkt, - c_tx->ctrl_len)) - return -EINVAL; - crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc); - } + if (c_tx->mpa_crc_hd && + crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, + c_tx->ctrl_len, (u8 *)crc) != 0) + return -EINVAL; c_tx->ctrl_len += MPA_CRC_SIZE; return PKT_COMPLETE; @@ -297,8 +292,7 @@ static int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent, .iov_len = c_tx->ctrl_len - c_tx->ctrl_sent }; - int rv = kernel_sendmsg(s, &msg, &iov, 1, - c_tx->ctrl_len - c_tx->ctrl_sent); + int rv = kernel_sendmsg(s, &msg, &iov, 1, iov.iov_len); if (rv >= 0) { c_tx->ctrl_sent += rv; @@ -502,13 +496,7 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) if (!is_kva) { struct page *p; - if (mem->is_pbl) - p = siw_get_pblpage( - mem, sge->laddr + sge_off, - &pbl_idx); - else - p = siw_get_upage(mem->umem, - sge->laddr + sge_off); + p = siw_get_page(mem, sge, sge_off, &pbl_idx); if (unlikely(!p)) { siw_unmap_pages(iov, kmap_mask, seg); wqe->processed -= c_tx->bytes_unsent; @@ -1009,13 +997,12 @@ static int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe) * MPA FPDUs, each containing a DDP segment. * * SQ processing may occur in user context as a result of posting - * new WQE's or from siw_sq_work_handler() context. Processing in + * new WQE's or from siw_tx_thread context. Processing in * user context is limited to non-kernel verbs users. * * SQ processing may get paused anytime, possibly in the middle of a WR * or FPDU, if insufficient send space is available. SQ processing - * gets resumed from siw_sq_work_handler(), if send space becomes - * available again. + * gets resumed from siw_tx_thread, if send space becomes available again. * * Must be called with the QP state read-locked. * diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index c5c27db9c2..ecf0444666 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -19,6 +19,15 @@ #include "siw_verbs.h" #include "siw_mem.h" +static int siw_qp_state_to_ib_qp_state[SIW_QP_STATE_COUNT] = { + [SIW_QP_STATE_IDLE] = IB_QPS_INIT, + [SIW_QP_STATE_RTR] = IB_QPS_RTR, + [SIW_QP_STATE_RTS] = IB_QPS_RTS, + [SIW_QP_STATE_CLOSING] = IB_QPS_SQD, + [SIW_QP_STATE_TERMINATE] = IB_QPS_SQE, + [SIW_QP_STATE_ERROR] = IB_QPS_ERR +}; + static int ib_qp_state_to_siw_qp_state[IB_QPS_ERR + 1] = { [IB_QPS_RESET] = SIW_QP_STATE_IDLE, [IB_QPS_INIT] = SIW_QP_STATE_IDLE, @@ -66,12 +75,9 @@ int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma) entry = to_siw_mmap_entry(rdma_entry); rv = remap_vmalloc_range(vma, entry->address, 0); - if (rv) { + if (rv) pr_warn("remap_vmalloc_range failed: %lu, %zu\n", vma->vm_pgoff, size); - goto out; - } -out: rdma_user_mmap_entry_put(rdma_entry); return rv; @@ -336,11 +342,10 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, goto err_atomic; } /* - * NOTE: we allow for zero element SQ and RQ WQE's SGL's - * but not for a QP unable to hold any WQE (SQ + RQ) + * NOTE: we don't allow for a QP unable to hold any SQ WQE */ - if (attrs->cap.max_send_wr + attrs->cap.max_recv_wr == 0) { - siw_dbg(base_dev, "QP must have send or receive queue\n"); + if (attrs->cap.max_send_wr == 0) { + siw_dbg(base_dev, "QP must have send queue\n"); rv = -EINVAL; goto err_atomic; } @@ -360,21 +365,14 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (rv) goto err_atomic; - num_sqe = attrs->cap.max_send_wr; - num_rqe = attrs->cap.max_recv_wr; /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - if (num_sqe) - num_sqe = roundup_pow_of_two(num_sqe); - else { - /* Zero sized SQ is not supported */ - rv = -EINVAL; - goto err_out_xa; - } + num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); + num_rqe = attrs->cap.max_recv_wr; if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); @@ -515,6 +513,7 @@ int siw_query_qp(struct ib_qp *base_qp, struct ib_qp_attr *qp_attr, } else { return -EINVAL; } + qp_attr->qp_state = siw_qp_state_to_ib_qp_state[qp->attrs.state]; qp_attr->cap.max_inline_data = SIW_MAX_INLINE; qp_attr->cap.max_send_wr = qp->attrs.sq_size; qp_attr->cap.max_send_sge = qp->attrs.sq_max_sges; @@ -1321,8 +1320,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, struct siw_umem *umem = NULL; struct siw_ureq_reg_mr ureq; struct siw_device *sdev = to_siw_dev(pd->device); - - unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK); int rv; siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n", @@ -1338,20 +1335,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, rv = -EINVAL; goto err_out; } - if (mem_limit != RLIM_INFINITY) { - unsigned long num_pages = - (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT; - mem_limit >>= PAGE_SHIFT; - - if (num_pages > mem_limit - current->mm->locked_vm) { - siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n", - num_pages, mem_limit, - current->mm->locked_vm); - rv = -ENOMEM; - goto err_out; - } - } - umem = siw_umem_get(start, len, ib_access_writable(rights)); + umem = siw_umem_get(pd->device, start, len, rights); if (IS_ERR(umem)) { rv = PTR_ERR(umem); siw_dbg_pd(pd, "getting user memory failed: %d\n", rv); @@ -1404,7 +1388,7 @@ err_out: kfree_rcu(mr, rcu); } else { if (umem) - siw_umem_release(umem, false); + siw_umem_release(umem); } return ERR_PTR(rv); } |