diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/lib/rdma | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/lib/rdma')
-rw-r--r-- | src/spdk/lib/rdma/Makefile | 70 | ||||
-rw-r--r-- | src/spdk/lib/rdma/rdma_mlx5_dv.c | 316 | ||||
-rw-r--r-- | src/spdk/lib/rdma/rdma_verbs.c | 167 | ||||
-rw-r--r-- | src/spdk/lib/rdma/spdk_rdma.map | 14 |
4 files changed, 567 insertions, 0 deletions
diff --git a/src/spdk/lib/rdma/Makefile b/src/spdk/lib/rdma/Makefile new file mode 100644 index 000000000..e6374557d --- /dev/null +++ b/src/spdk/lib/rdma/Makefile @@ -0,0 +1,70 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. All rights reserved. +# Copyright (c) Mellanox Technologies LTD. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +SO_VER := 1 +SO_MINOR := 0 + +SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_rdma.map) + +LIBNAME = rdma + +ifeq ($(CONFIG_RDMA_PROV),verbs) +C_SRCS = rdma_verbs.c +else ifeq ($(CONFIG_RDMA_PROV),mlx5_dv) +C_SRCS = rdma_mlx5_dv.c +LOCAL_SYS_LIBS += -lmlx5 +else +$(error Wrong RDMA provider specified: $(CONFIG_RDMA_PROV)) +endif + +LOCAL_SYS_LIBS += -libverbs -lrdmacm +#Attach only if FreeBSD and RDMA is specified with configure +ifeq ($(OS),FreeBSD) +# Mellanox - MLX4 HBA Userspace Library +ifneq ("$(wildcard /usr/lib/libmlx4.*)","") +LOCAL_SYS_LIBS += -lmlx4 +endif +# Mellanox - MLX5 HBA Userspace Library +ifneq ("$(wildcard /usr/lib/libmlx5.*)","") +LOCAL_SYS_LIBS += -lmlx5 +endif +# Chelsio HBA Userspace Library +ifneq ("$(wildcard /usr/lib/libcxgb4.*)","") +LOCAL_SYS_LIBS += -lcxgb4 +endif +endif + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/src/spdk/lib/rdma/rdma_mlx5_dv.c b/src/spdk/lib/rdma/rdma_mlx5_dv.c new file mode 100644 index 000000000..bae3afdda --- /dev/null +++ b/src/spdk/lib/rdma/rdma_mlx5_dv.c @@ -0,0 +1,316 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rdma/rdma_cma.h> +#include <infiniband/mlx5dv.h> + +#include "spdk/stdinc.h" +#include "spdk/string.h" +#include "spdk/likely.h" + +#include "spdk_internal/rdma.h" +#include "spdk_internal/log.h" + +struct spdk_rdma_mlx5_dv_qp { + struct spdk_rdma_qp common; + struct ibv_qp_ex *qpex; +}; + +static int +rdma_mlx5_dv_init_qpair(struct spdk_rdma_mlx5_dv_qp *mlx5_qp) +{ + struct ibv_qp_attr qp_attr; + int qp_attr_mask, rc; + + qp_attr.qp_state = IBV_QPS_INIT; + rc = rdma_init_qp_attr(mlx5_qp->common.cm_id, &qp_attr, &qp_attr_mask); + if (rc) { + SPDK_ERRLOG("Failed to init attr IBV_QPS_INIT, errno %s (%d)\n", spdk_strerror(errno), errno); + return rc; + } + + rc = ibv_modify_qp(mlx5_qp->common.qp, &qp_attr, qp_attr_mask); + if (rc) { + SPDK_ERRLOG("ibv_modify_qp(IBV_QPS_INIT) failed, rc %d\n", rc); + return rc; + } + + qp_attr.qp_state = IBV_QPS_RTR; + rc = rdma_init_qp_attr(mlx5_qp->common.cm_id, &qp_attr, &qp_attr_mask); + if (rc) { + SPDK_ERRLOG("Failed to init attr IBV_QPS_RTR, errno %s (%d)\n", spdk_strerror(errno), errno); + return rc; + } + + rc = ibv_modify_qp(mlx5_qp->common.qp, &qp_attr, qp_attr_mask); + if (rc) { + SPDK_ERRLOG("ibv_modify_qp(IBV_QPS_RTR) failed, rc %d\n", rc); + return rc; + } + + qp_attr.qp_state = IBV_QPS_RTS; + rc = rdma_init_qp_attr(mlx5_qp->common.cm_id, &qp_attr, &qp_attr_mask); + if (rc) { + SPDK_ERRLOG("Failed to init attr IBV_QPS_RTR, errno %s (%d)\n", spdk_strerror(errno), errno); + return rc; + } + + rc = ibv_modify_qp(mlx5_qp->common.qp, &qp_attr, qp_attr_mask); + if (rc) { + SPDK_ERRLOG("ibv_modify_qp(IBV_QPS_RTS) failed, rc %d\n", rc); + } + + return rc; +} + +struct spdk_rdma_qp * +spdk_rdma_qp_create(struct rdma_cm_id *cm_id, struct spdk_rdma_qp_init_attr *qp_attr) +{ + assert(cm_id); + assert(qp_attr); + + struct ibv_qp *qp; + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + struct ibv_qp_init_attr_ex dv_qp_attr = { + .qp_context = qp_attr->qp_context, + .send_cq = qp_attr->send_cq, + .recv_cq = qp_attr->recv_cq, + .srq = qp_attr->srq, + .cap = qp_attr->cap, + .qp_type = IBV_QPT_RC, + .comp_mask = IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, + .pd = qp_attr->pd ? qp_attr->pd : cm_id->pd + }; + + assert(dv_qp_attr.pd); + + mlx5_qp = calloc(1, sizeof(*mlx5_qp)); + if (!mlx5_qp) { + SPDK_ERRLOG("qp memory allocation failed\n"); + return NULL; + } + + qp = mlx5dv_create_qp(cm_id->verbs, &dv_qp_attr, NULL); + + if (!qp) { + SPDK_ERRLOG("Failed to create qpair, errno %s (%d)\n", spdk_strerror(errno), errno); + free(mlx5_qp); + return NULL; + } + + mlx5_qp->common.qp = qp; + mlx5_qp->common.cm_id = cm_id; + mlx5_qp->qpex = ibv_qp_to_qp_ex(qp); + + if (!mlx5_qp->qpex) { + spdk_rdma_qp_destroy(&mlx5_qp->common); + return NULL; + } + + qp_attr->cap = dv_qp_attr.cap; + + return &mlx5_qp->common; +} + +int +spdk_rdma_qp_accept(struct spdk_rdma_qp *spdk_rdma_qp, struct rdma_conn_param *conn_param) +{ + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + + assert(spdk_rdma_qp != NULL); + assert(spdk_rdma_qp->cm_id != NULL); + + mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + + /* NVMEoF target must move qpair to RTS state */ + if (rdma_mlx5_dv_init_qpair(mlx5_qp) != 0) { + SPDK_ERRLOG("Failed to initialize qpair\n"); + /* Set errno to be compliant with rdma_accept behaviour */ + errno = ECONNABORTED; + return -1; + } + + return rdma_accept(spdk_rdma_qp->cm_id, conn_param); +} + +int +spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp) +{ + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + int rc; + + assert(spdk_rdma_qp); + + mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + + rc = rdma_mlx5_dv_init_qpair(mlx5_qp); + if (rc) { + SPDK_ERRLOG("Failed to initialize qpair\n"); + return rc; + } + + rc = rdma_establish(mlx5_qp->common.cm_id); + if (rc) { + SPDK_ERRLOG("rdma_establish failed, errno %s (%d)\n", spdk_strerror(errno), errno); + } + + return rc; +} + +void +spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp) +{ + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + int rc; + + assert(spdk_rdma_qp != NULL); + + mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + + if (spdk_rdma_qp->send_wrs.first != NULL) { + SPDK_WARNLOG("Destroying qpair with queued Work Requests\n"); + } + + if (mlx5_qp->common.qp) { + rc = ibv_destroy_qp(mlx5_qp->common.qp); + if (rc) { + SPDK_ERRLOG("Failed to destroy ibv qp %p, rc %d\n", mlx5_qp->common.qp, rc); + } + } + + free(mlx5_qp); +} + +int +spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp) +{ + int rc = 0; + + assert(spdk_rdma_qp != NULL); + + if (spdk_rdma_qp->qp) { + struct ibv_qp_attr qp_attr = {.qp_state = IBV_QPS_ERR}; + + rc = ibv_modify_qp(spdk_rdma_qp->qp, &qp_attr, IBV_QP_STATE); + if (rc) { + SPDK_ERRLOG("Failed to modify ibv qp %p state to ERR, rc %d\n", spdk_rdma_qp->qp, rc); + return rc; + } + } + + if (spdk_rdma_qp->cm_id) { + rc = rdma_disconnect(spdk_rdma_qp->cm_id); + if (rc) { + SPDK_ERRLOG("rdma_disconnect failed, errno %s (%d)\n", spdk_strerror(errno), errno); + } + } + + return rc; +} + +bool +spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first) +{ + struct ibv_send_wr *tmp; + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + bool is_first; + + assert(spdk_rdma_qp); + assert(first); + + is_first = spdk_rdma_qp->send_wrs.first == NULL; + mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + + if (is_first) { + ibv_wr_start(mlx5_qp->qpex); + spdk_rdma_qp->send_wrs.first = first; + } else { + spdk_rdma_qp->send_wrs.last->next = first; + } + + for (tmp = first; tmp != NULL; tmp = tmp->next) { + mlx5_qp->qpex->wr_id = tmp->wr_id; + mlx5_qp->qpex->wr_flags = tmp->send_flags; + + switch (tmp->opcode) { + case IBV_WR_SEND: + ibv_wr_send(mlx5_qp->qpex); + break; + case IBV_WR_SEND_WITH_INV: + ibv_wr_send_inv(mlx5_qp->qpex, tmp->invalidate_rkey); + break; + case IBV_WR_RDMA_READ: + ibv_wr_rdma_read(mlx5_qp->qpex, tmp->wr.rdma.rkey, tmp->wr.rdma.remote_addr); + break; + case IBV_WR_RDMA_WRITE: + ibv_wr_rdma_write(mlx5_qp->qpex, tmp->wr.rdma.rkey, tmp->wr.rdma.remote_addr); + break; + default: + SPDK_ERRLOG("Unexpected opcode %d\n", tmp->opcode); + assert(0); + } + + ibv_wr_set_sge_list(mlx5_qp->qpex, tmp->num_sge, tmp->sg_list); + + spdk_rdma_qp->send_wrs.last = tmp; + } + + return is_first; +} + +int +spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr) +{ + struct spdk_rdma_mlx5_dv_qp *mlx5_qp; + int rc; + + assert(bad_wr); + assert(spdk_rdma_qp); + + mlx5_qp = SPDK_CONTAINEROF(spdk_rdma_qp, struct spdk_rdma_mlx5_dv_qp, common); + + if (spdk_unlikely(spdk_rdma_qp->send_wrs.first == NULL)) { + return 0; + } + + rc = ibv_wr_complete(mlx5_qp->qpex); + + if (spdk_unlikely(rc)) { + /* If ibv_wr_complete reports an error that means that no WRs are posted to NIC */ + *bad_wr = spdk_rdma_qp->send_wrs.first; + } + + spdk_rdma_qp->send_wrs.first = NULL; + + return rc; +} diff --git a/src/spdk/lib/rdma/rdma_verbs.c b/src/spdk/lib/rdma/rdma_verbs.c new file mode 100644 index 000000000..66be5bf60 --- /dev/null +++ b/src/spdk/lib/rdma/rdma_verbs.c @@ -0,0 +1,167 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. All rights reserved. + * Copyright (c) Mellanox Technologies LTD. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rdma/rdma_cma.h> + +#include "spdk/stdinc.h" +#include "spdk/string.h" +#include "spdk/likely.h" + +#include "spdk_internal/rdma.h" +#include "spdk_internal/log.h" + +struct spdk_rdma_qp * +spdk_rdma_qp_create(struct rdma_cm_id *cm_id, struct spdk_rdma_qp_init_attr *qp_attr) +{ + struct spdk_rdma_qp *spdk_rdma_qp; + int rc; + struct ibv_qp_init_attr attr = { + .qp_context = qp_attr->qp_context, + .send_cq = qp_attr->send_cq, + .recv_cq = qp_attr->recv_cq, + .srq = qp_attr->srq, + .cap = qp_attr->cap, + .qp_type = IBV_QPT_RC + }; + + spdk_rdma_qp = calloc(1, sizeof(*spdk_rdma_qp)); + if (!spdk_rdma_qp) { + SPDK_ERRLOG("qp memory allocation failed\n"); + return NULL; + } + + rc = rdma_create_qp(cm_id, qp_attr->pd, &attr); + if (rc) { + SPDK_ERRLOG("Failed to create qp, errno %s (%d)\n", spdk_strerror(errno), errno); + free(spdk_rdma_qp); + return NULL; + } + + qp_attr->cap = attr.cap; + spdk_rdma_qp->qp = cm_id->qp; + spdk_rdma_qp->cm_id = cm_id; + + return spdk_rdma_qp; +} + +int +spdk_rdma_qp_accept(struct spdk_rdma_qp *spdk_rdma_qp, struct rdma_conn_param *conn_param) +{ + assert(spdk_rdma_qp != NULL); + assert(spdk_rdma_qp->cm_id != NULL); + + return rdma_accept(spdk_rdma_qp->cm_id, conn_param); +} + +int +spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp) +{ + /* Nothing to be done for Verbs */ + return 0; +} + +void +spdk_rdma_qp_destroy(struct spdk_rdma_qp *spdk_rdma_qp) +{ + assert(spdk_rdma_qp != NULL); + + if (spdk_rdma_qp->send_wrs.first != NULL) { + SPDK_WARNLOG("Destroying qpair with queued Work Requests\n"); + } + + if (spdk_rdma_qp->qp) { + rdma_destroy_qp(spdk_rdma_qp->cm_id); + } + + free(spdk_rdma_qp); +} + +int +spdk_rdma_qp_disconnect(struct spdk_rdma_qp *spdk_rdma_qp) +{ + int rc = 0; + + assert(spdk_rdma_qp != NULL); + + if (spdk_rdma_qp->cm_id) { + rc = rdma_disconnect(spdk_rdma_qp->cm_id); + if (rc) { + SPDK_ERRLOG("rdma_disconnect failed, errno %s (%d)\n", spdk_strerror(errno), errno); + } + } + + return rc; +} + +bool +spdk_rdma_qp_queue_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr *first) +{ + struct ibv_send_wr *last; + + assert(spdk_rdma_qp); + assert(first); + + last = first; + while (last->next != NULL) { + last = last->next; + } + + if (spdk_rdma_qp->send_wrs.first == NULL) { + spdk_rdma_qp->send_wrs.first = first; + spdk_rdma_qp->send_wrs.last = last; + return true; + } else { + spdk_rdma_qp->send_wrs.last->next = first; + spdk_rdma_qp->send_wrs.last = last; + return false; + } +} + +int +spdk_rdma_qp_flush_send_wrs(struct spdk_rdma_qp *spdk_rdma_qp, struct ibv_send_wr **bad_wr) +{ + int rc; + + assert(spdk_rdma_qp); + assert(bad_wr); + + if (spdk_unlikely(!spdk_rdma_qp->send_wrs.first)) { + return 0; + } + + rc = ibv_post_send(spdk_rdma_qp->qp, spdk_rdma_qp->send_wrs.first, bad_wr); + + spdk_rdma_qp->send_wrs.first = NULL; + + return rc; +} diff --git a/src/spdk/lib/rdma/spdk_rdma.map b/src/spdk/lib/rdma/spdk_rdma.map new file mode 100644 index 000000000..9268a2191 --- /dev/null +++ b/src/spdk/lib/rdma/spdk_rdma.map @@ -0,0 +1,14 @@ +{ + global: + + # Public functions + spdk_rdma_qp_create; + spdk_rdma_qp_accept; + spdk_rdma_qp_complete_connect; + spdk_rdma_qp_destroy; + spdk_rdma_qp_disconnect; + spdk_rdma_qp_queue_send_wrs; + spdk_rdma_qp_flush_send_wrs; + + local: *; +}; |