summaryrefslogtreecommitdiffstats
path: root/src/spdk/module/bdev/rbd
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/module/bdev/rbd
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/module/bdev/rbd')
-rw-r--r--src/spdk/module/bdev/rbd/Makefile45
-rw-r--r--src/spdk/module/bdev/rbd/bdev_rbd.c898
-rw-r--r--src/spdk/module/bdev/rbd/bdev_rbd.h68
-rw-r--r--src/spdk/module/bdev/rbd/bdev_rbd_rpc.c252
4 files changed, 1263 insertions, 0 deletions
diff --git a/src/spdk/module/bdev/rbd/Makefile b/src/spdk/module/bdev/rbd/Makefile
new file mode 100644
index 000000000..055e14dac
--- /dev/null
+++ b/src/spdk/module/bdev/rbd/Makefile
@@ -0,0 +1,45 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+SO_VER := 3
+SO_MINOR := 0
+
+C_SRCS = bdev_rbd.c bdev_rbd_rpc.c
+LIBNAME = bdev_rbd
+
+SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/module/bdev/rbd/bdev_rbd.c b/src/spdk/module/bdev/rbd/bdev_rbd.c
new file mode 100644
index 000000000..f3b2547c4
--- /dev/null
+++ b/src/spdk/module/bdev/rbd/bdev_rbd.c
@@ -0,0 +1,898 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+
+#include "bdev_rbd.h"
+
+#include <rbd/librbd.h>
+#include <rados/librados.h>
+#include <sys/eventfd.h>
+
+#include "spdk/conf.h"
+#include "spdk/env.h"
+#include "spdk/bdev.h"
+#include "spdk/thread.h"
+#include "spdk/json.h"
+#include "spdk/string.h"
+#include "spdk/util.h"
+
+#include "spdk/bdev_module.h"
+#include "spdk_internal/log.h"
+
+#define SPDK_RBD_QUEUE_DEPTH 128
+
+static int bdev_rbd_count = 0;
+
+#define BDEV_RBD_POLL_US 50
+
+struct bdev_rbd {
+ struct spdk_bdev disk;
+ char *rbd_name;
+ char *user_id;
+ char *pool_name;
+ char **config;
+ rbd_image_info_t info;
+ TAILQ_ENTRY(bdev_rbd) tailq;
+ struct spdk_poller *reset_timer;
+ struct spdk_bdev_io *reset_bdev_io;
+};
+
+struct bdev_rbd_io_channel {
+ rados_ioctx_t io_ctx;
+ rados_t cluster;
+ struct pollfd pfd;
+ rbd_image_t image;
+ struct bdev_rbd *disk;
+ struct spdk_poller *poller;
+};
+
+struct bdev_rbd_io {
+ uint64_t remaining_len;
+ int num_segments;
+ bool failed;
+};
+
+static void
+bdev_rbd_free(struct bdev_rbd *rbd)
+{
+ if (!rbd) {
+ return;
+ }
+
+ free(rbd->disk.name);
+ free(rbd->rbd_name);
+ free(rbd->user_id);
+ free(rbd->pool_name);
+ bdev_rbd_free_config(rbd->config);
+ free(rbd);
+}
+
+void
+bdev_rbd_free_config(char **config)
+{
+ char **entry;
+
+ if (config) {
+ for (entry = config; *entry; entry++) {
+ free(*entry);
+ }
+ free(config);
+ }
+}
+
+char **
+bdev_rbd_dup_config(const char *const *config)
+{
+ size_t count;
+ char **copy;
+
+ if (!config) {
+ return NULL;
+ }
+ for (count = 0; config[count]; count++) {}
+ copy = calloc(count + 1, sizeof(*copy));
+ if (!copy) {
+ return NULL;
+ }
+ for (count = 0; config[count]; count++) {
+ if (!(copy[count] = strdup(config[count]))) {
+ bdev_rbd_free_config(copy);
+ return NULL;
+ }
+ }
+ return copy;
+}
+
+static int
+bdev_rados_context_init(const char *user_id, const char *rbd_pool_name, const char *const *config,
+ rados_t *cluster, rados_ioctx_t *io_ctx)
+{
+ int ret;
+
+ ret = rados_create(cluster, user_id);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create rados_t struct\n");
+ return -1;
+ }
+
+ if (config) {
+ const char *const *entry = config;
+ while (*entry) {
+ ret = rados_conf_set(*cluster, entry[0], entry[1]);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to set %s = %s\n", entry[0], entry[1]);
+ rados_shutdown(*cluster);
+ return -1;
+ }
+ entry += 2;
+ }
+ } else {
+ ret = rados_conf_read_file(*cluster, NULL);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to read conf file\n");
+ rados_shutdown(*cluster);
+ return -1;
+ }
+ }
+
+ ret = rados_connect(*cluster);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to connect to rbd_pool\n");
+ rados_shutdown(*cluster);
+ return -1;
+ }
+
+ ret = rados_ioctx_create(*cluster, rbd_pool_name, io_ctx);
+
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create ioctx\n");
+ rados_shutdown(*cluster);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+bdev_rbd_init(const char *user_id, const char *rbd_pool_name, const char *const *config,
+ const char *rbd_name, rbd_image_info_t *info)
+{
+ int ret;
+ rados_t cluster = NULL;
+ rados_ioctx_t io_ctx = NULL;
+ rbd_image_t image = NULL;
+
+ ret = bdev_rados_context_init(user_id, rbd_pool_name, config, &cluster, &io_ctx);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create rados context for user_id=%s and rbd_pool=%s\n",
+ user_id ? user_id : "admin (the default)", rbd_pool_name);
+ return -1;
+ }
+
+ ret = rbd_open(io_ctx, rbd_name, &image, NULL);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to open specified rbd device\n");
+ goto err;
+ }
+ ret = rbd_stat(image, info, sizeof(*info));
+ rbd_close(image);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to stat specified rbd device\n");
+ goto err;
+ }
+
+ rados_ioctx_destroy(io_ctx);
+ return 0;
+err:
+ rados_ioctx_destroy(io_ctx);
+ rados_shutdown(cluster);
+ return -1;
+}
+
+static void
+bdev_rbd_exit(rbd_image_t image)
+{
+ rbd_flush(image);
+ rbd_close(image);
+}
+
+static void
+bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg)
+{
+ /* Doing nothing here */
+}
+
+static int
+bdev_rbd_start_aio(rbd_image_t image, struct spdk_bdev_io *bdev_io,
+ void *buf, uint64_t offset, size_t len)
+{
+ int ret;
+ rbd_completion_t comp;
+
+ ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb,
+ &comp);
+ if (ret < 0) {
+ return -1;
+ }
+
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ ret = rbd_aio_read(image, offset, len,
+ buf, comp);
+ } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
+ ret = rbd_aio_write(image, offset, len,
+ buf, comp);
+ } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
+ ret = rbd_aio_flush(image, comp);
+ }
+
+ if (ret < 0) {
+ rbd_aio_release(comp);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bdev_rbd_library_init(void);
+
+static int
+bdev_rbd_get_ctx_size(void)
+{
+ return sizeof(struct bdev_rbd_io);
+}
+
+static struct spdk_bdev_module rbd_if = {
+ .name = "rbd",
+ .module_init = bdev_rbd_library_init,
+ .get_ctx_size = bdev_rbd_get_ctx_size,
+
+};
+SPDK_BDEV_MODULE_REGISTER(rbd, &rbd_if)
+
+static int64_t
+bdev_rbd_rw(struct bdev_rbd *disk, struct spdk_io_channel *ch,
+ struct spdk_bdev_io *bdev_io, struct iovec *iov,
+ int iovcnt, size_t len, uint64_t offset)
+{
+ struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
+ struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch);
+ size_t remaining = len;
+ int i, rc;
+
+ rbd_io->remaining_len = 0;
+ rbd_io->num_segments = 0;
+ rbd_io->failed = false;
+
+ for (i = 0; i < iovcnt && remaining > 0; i++) {
+ size_t seg_len = spdk_min(remaining, iov[i].iov_len);
+
+ rc = bdev_rbd_start_aio(rbdio_ch->image, bdev_io, iov[i].iov_base, offset, seg_len);
+ if (rc) {
+ /*
+ * This bdev_rbd_start_aio() call failed, but if any previous ones were
+ * submitted, we need to wait for them to finish.
+ */
+ if (rbd_io->num_segments == 0) {
+ /* No previous I/O submitted - return error code immediately. */
+ return rc;
+ }
+
+ /* Return and wait for outstanding I/O to complete. */
+ rbd_io->failed = true;
+ return 0;
+ }
+
+ rbd_io->num_segments++;
+ rbd_io->remaining_len += seg_len;
+
+ offset += seg_len;
+ remaining -= seg_len;
+ }
+
+ return 0;
+}
+
+static int64_t
+bdev_rbd_flush(struct bdev_rbd *disk, struct spdk_io_channel *ch,
+ struct spdk_bdev_io *bdev_io, uint64_t offset, uint64_t nbytes)
+{
+ struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch);
+ struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
+
+ rbd_io->num_segments++;
+ return bdev_rbd_start_aio(rbdio_ch->image, bdev_io, NULL, offset, nbytes);
+}
+
+static int
+bdev_rbd_reset_timer(void *arg)
+{
+ struct bdev_rbd *disk = arg;
+
+ /*
+ * TODO: This should check if any I/O is still in flight before completing the reset.
+ * For now, just complete after the timer expires.
+ */
+ spdk_bdev_io_complete(disk->reset_bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
+ spdk_poller_unregister(&disk->reset_timer);
+ disk->reset_bdev_io = NULL;
+
+ return SPDK_POLLER_BUSY;
+}
+
+static int
+bdev_rbd_reset(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io)
+{
+ /*
+ * HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a
+ * timer to wait for in-flight I/O to complete.
+ */
+ assert(disk->reset_bdev_io == NULL);
+ disk->reset_bdev_io = bdev_io;
+ disk->reset_timer = SPDK_POLLER_REGISTER(bdev_rbd_reset_timer, disk, 1 * 1000 * 1000);
+
+ return 0;
+}
+
+static int
+bdev_rbd_destruct(void *ctx)
+{
+ struct bdev_rbd *rbd = ctx;
+
+ spdk_io_device_unregister(rbd, NULL);
+
+ bdev_rbd_free(rbd);
+ return 0;
+}
+
+static void
+bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
+ bool success)
+{
+ int ret;
+
+ if (!success) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ return;
+ }
+
+ ret = bdev_rbd_rw(bdev_io->bdev->ctxt,
+ ch,
+ bdev_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+
+ if (ret != 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static int _bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+ return 0;
+
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ return bdev_rbd_rw((struct bdev_rbd *)bdev_io->bdev->ctxt,
+ ch,
+ bdev_io,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
+
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ return bdev_rbd_flush((struct bdev_rbd *)bdev_io->bdev->ctxt,
+ ch,
+ bdev_io,
+ bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen,
+ bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
+
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return bdev_rbd_reset((struct bdev_rbd *)bdev_io->bdev->ctxt,
+ bdev_io);
+
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+static void bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ if (_bdev_rbd_submit_request(ch, bdev_io) < 0) {
+ spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
+ }
+}
+
+static bool
+bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
+{
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ case SPDK_BDEV_IO_TYPE_RESET:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static int
+bdev_rbd_io_poll(void *arg)
+{
+ struct bdev_rbd_io_channel *ch = arg;
+ int i, io_status, rc;
+ rbd_completion_t comps[SPDK_RBD_QUEUE_DEPTH];
+ struct spdk_bdev_io *bdev_io;
+ struct bdev_rbd_io *rbd_io;
+
+ rc = poll(&ch->pfd, 1, 0);
+
+ /* check the return value of poll since we have only one fd for each channel */
+ if (rc != 1) {
+ return SPDK_POLLER_BUSY;
+ }
+
+ rc = rbd_poll_io_events(ch->image, comps, SPDK_RBD_QUEUE_DEPTH);
+ for (i = 0; i < rc; i++) {
+ bdev_io = rbd_aio_get_arg(comps[i]);
+ rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx;
+ io_status = rbd_aio_get_return_value(comps[i]);
+
+ assert(rbd_io->num_segments > 0);
+ rbd_io->num_segments--;
+
+ if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
+ if (io_status > 0) {
+ /* For reads, io_status is the length */
+ rbd_io->remaining_len -= io_status;
+ }
+
+ if (rbd_io->num_segments == 0 && rbd_io->remaining_len != 0) {
+ rbd_io->failed = true;
+ }
+ } else {
+ /* For others, 0 means success */
+ if (io_status != 0) {
+ rbd_io->failed = true;
+ }
+ }
+
+ rbd_aio_release(comps[i]);
+
+ if (rbd_io->num_segments == 0) {
+ spdk_bdev_io_complete(bdev_io,
+ rbd_io->failed ? SPDK_BDEV_IO_STATUS_FAILED : SPDK_BDEV_IO_STATUS_SUCCESS);
+ }
+ }
+
+ return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
+}
+
+static void
+bdev_rbd_free_channel(struct bdev_rbd_io_channel *ch)
+{
+ if (!ch) {
+ return;
+ }
+
+ if (ch->image) {
+ bdev_rbd_exit(ch->image);
+ }
+
+ if (ch->io_ctx) {
+ rados_ioctx_destroy(ch->io_ctx);
+ }
+
+ if (ch->cluster) {
+ rados_shutdown(ch->cluster);
+ }
+
+ if (ch->pfd.fd >= 0) {
+ close(ch->pfd.fd);
+ }
+}
+
+static void *
+bdev_rbd_handle(void *arg)
+{
+ struct bdev_rbd_io_channel *ch = arg;
+ void *ret = arg;
+
+ if (rbd_open(ch->io_ctx, ch->disk->rbd_name, &ch->image, NULL) < 0) {
+ SPDK_ERRLOG("Failed to open specified rbd device\n");
+ ret = NULL;
+ }
+
+ return ret;
+}
+
+static int
+bdev_rbd_create_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_rbd_io_channel *ch = ctx_buf;
+ int ret;
+
+ ch->disk = io_device;
+ ch->image = NULL;
+ ch->io_ctx = NULL;
+ ch->pfd.fd = -1;
+
+ ret = bdev_rados_context_init(ch->disk->user_id, ch->disk->pool_name,
+ (const char *const *)ch->disk->config,
+ &ch->cluster, &ch->io_ctx);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to create rados context for user_id %s and rbd_pool=%s\n",
+ ch->disk->user_id ? ch->disk->user_id : "admin (the default)", ch->disk->pool_name);
+ goto err;
+ }
+
+ if (spdk_call_unaffinitized(bdev_rbd_handle, ch) == NULL) {
+ goto err;
+ }
+
+ ch->pfd.fd = eventfd(0, EFD_NONBLOCK);
+ if (ch->pfd.fd < 0) {
+ SPDK_ERRLOG("Failed to get eventfd\n");
+ goto err;
+ }
+
+ ch->pfd.events = POLLIN;
+ ret = rbd_set_image_notification(ch->image, ch->pfd.fd, EVENT_TYPE_EVENTFD);
+ if (ret < 0) {
+ SPDK_ERRLOG("Failed to set rbd image notification\n");
+ goto err;
+ }
+
+ ch->poller = SPDK_POLLER_REGISTER(bdev_rbd_io_poll, ch, BDEV_RBD_POLL_US);
+
+ return 0;
+
+err:
+ bdev_rbd_free_channel(ch);
+ return -1;
+}
+
+static void
+bdev_rbd_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct bdev_rbd_io_channel *io_channel = ctx_buf;
+
+ bdev_rbd_free_channel(io_channel);
+
+ spdk_poller_unregister(&io_channel->poller);
+}
+
+static struct spdk_io_channel *
+bdev_rbd_get_io_channel(void *ctx)
+{
+ struct bdev_rbd *rbd_bdev = ctx;
+
+ return spdk_get_io_channel(rbd_bdev);
+}
+
+static int
+bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
+{
+ struct bdev_rbd *rbd_bdev = ctx;
+
+ spdk_json_write_named_object_begin(w, "rbd");
+
+ spdk_json_write_named_string(w, "pool_name", rbd_bdev->pool_name);
+
+ spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name);
+
+ if (rbd_bdev->user_id) {
+ spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id);
+ }
+
+ if (rbd_bdev->config) {
+ char **entry = rbd_bdev->config;
+
+ spdk_json_write_named_object_begin(w, "config");
+ while (*entry) {
+ spdk_json_write_named_string(w, entry[0], entry[1]);
+ entry += 2;
+ }
+ spdk_json_write_object_end(w);
+ }
+
+ spdk_json_write_object_end(w);
+
+ return 0;
+}
+
+static void
+bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
+{
+ struct bdev_rbd *rbd = bdev->ctxt;
+
+ spdk_json_write_object_begin(w);
+
+ spdk_json_write_named_string(w, "method", "bdev_rbd_create");
+
+ spdk_json_write_named_object_begin(w, "params");
+ spdk_json_write_named_string(w, "name", bdev->name);
+ spdk_json_write_named_string(w, "pool_name", rbd->pool_name);
+ spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name);
+ spdk_json_write_named_uint32(w, "block_size", bdev->blocklen);
+ if (rbd->user_id) {
+ spdk_json_write_named_string(w, "user_id", rbd->user_id);
+ }
+
+ if (rbd->config) {
+ char **entry = rbd->config;
+
+ spdk_json_write_named_object_begin(w, "config");
+ while (*entry) {
+ spdk_json_write_named_string(w, entry[0], entry[1]);
+ entry += 2;
+ }
+ spdk_json_write_object_end(w);
+ }
+
+ spdk_json_write_object_end(w);
+
+ spdk_json_write_object_end(w);
+}
+
+static const struct spdk_bdev_fn_table rbd_fn_table = {
+ .destruct = bdev_rbd_destruct,
+ .submit_request = bdev_rbd_submit_request,
+ .io_type_supported = bdev_rbd_io_type_supported,
+ .get_io_channel = bdev_rbd_get_io_channel,
+ .dump_info_json = bdev_rbd_dump_info_json,
+ .write_config_json = bdev_rbd_write_config_json,
+};
+
+int
+bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
+ const char *pool_name,
+ const char *const *config,
+ const char *rbd_name,
+ uint32_t block_size)
+{
+ struct bdev_rbd *rbd;
+ int ret;
+
+ if ((pool_name == NULL) || (rbd_name == NULL)) {
+ return -EINVAL;
+ }
+
+ rbd = calloc(1, sizeof(struct bdev_rbd));
+ if (rbd == NULL) {
+ SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n");
+ return -ENOMEM;
+ }
+
+ rbd->rbd_name = strdup(rbd_name);
+ if (!rbd->rbd_name) {
+ bdev_rbd_free(rbd);
+ return -ENOMEM;
+ }
+
+ if (user_id) {
+ rbd->user_id = strdup(user_id);
+ if (!rbd->user_id) {
+ bdev_rbd_free(rbd);
+ return -ENOMEM;
+ }
+ }
+
+ rbd->pool_name = strdup(pool_name);
+ if (!rbd->pool_name) {
+ bdev_rbd_free(rbd);
+ return -ENOMEM;
+ }
+
+ if (config && !(rbd->config = bdev_rbd_dup_config(config))) {
+ bdev_rbd_free(rbd);
+ return -ENOMEM;
+ }
+
+ ret = bdev_rbd_init(rbd->user_id, rbd->pool_name,
+ (const char *const *)rbd->config,
+ rbd_name, &rbd->info);
+ if (ret < 0) {
+ bdev_rbd_free(rbd);
+ SPDK_ERRLOG("Failed to init rbd device\n");
+ return ret;
+ }
+
+ if (name) {
+ rbd->disk.name = strdup(name);
+ } else {
+ rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count);
+ }
+ if (!rbd->disk.name) {
+ bdev_rbd_free(rbd);
+ return -ENOMEM;
+ }
+ rbd->disk.product_name = "Ceph Rbd Disk";
+ bdev_rbd_count++;
+
+ rbd->disk.write_cache = 0;
+ rbd->disk.blocklen = block_size;
+ rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen;
+ rbd->disk.ctxt = rbd;
+ rbd->disk.fn_table = &rbd_fn_table;
+ rbd->disk.module = &rbd_if;
+
+ SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name);
+
+ spdk_io_device_register(rbd, bdev_rbd_create_cb,
+ bdev_rbd_destroy_cb,
+ sizeof(struct bdev_rbd_io_channel),
+ rbd_name);
+ ret = spdk_bdev_register(&rbd->disk);
+ if (ret) {
+ spdk_io_device_unregister(rbd, NULL);
+ bdev_rbd_free(rbd);
+ return ret;
+ }
+
+ *bdev = &(rbd->disk);
+
+ return ret;
+}
+
+void
+bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn, void *cb_arg)
+{
+ if (!bdev || bdev->module != &rbd_if) {
+ cb_fn(cb_arg, -ENODEV);
+ return;
+ }
+
+ spdk_bdev_unregister(bdev, cb_fn, cb_arg);
+}
+
+int
+bdev_rbd_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_mb)
+{
+ struct spdk_io_channel *ch;
+ struct bdev_rbd_io_channel *rbd_io_ch;
+ int rc;
+ uint64_t new_size_in_byte;
+ uint64_t current_size_in_mb;
+
+ if (bdev->module != &rbd_if) {
+ return -EINVAL;
+ }
+
+ current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024);
+ if (current_size_in_mb > new_size_in_mb) {
+ SPDK_ERRLOG("The new bdev size must be lager than current bdev size.\n");
+ return -EINVAL;
+ }
+
+ ch = bdev_rbd_get_io_channel(bdev);
+ rbd_io_ch = spdk_io_channel_get_ctx(ch);
+ new_size_in_byte = new_size_in_mb * 1024 * 1024;
+
+ rc = rbd_resize(rbd_io_ch->image, new_size_in_byte);
+ if (rc != 0) {
+ SPDK_ERRLOG("failed to resize the ceph bdev.\n");
+ return rc;
+ }
+
+ rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen);
+ if (rc != 0) {
+ SPDK_ERRLOG("failed to notify block cnt change.\n");
+ return rc;
+ }
+
+ return rc;
+}
+
+static int
+bdev_rbd_library_init(void)
+{
+ int i, rc = 0;
+ const char *val;
+ const char *pool_name;
+ const char *rbd_name;
+ struct spdk_bdev *bdev;
+ uint32_t block_size;
+ long int tmp;
+
+ struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Ceph");
+
+ if (sp == NULL) {
+ /*
+ * Ceph section not found. Do not initialize any rbd LUNS.
+ */
+ goto end;
+ }
+
+ /* Init rbd block devices */
+ for (i = 0; ; i++) {
+ val = spdk_conf_section_get_nval(sp, "Ceph", i);
+ if (val == NULL) {
+ break;
+ }
+
+ /* get the Rbd_pool name */
+ pool_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 0);
+ if (pool_name == NULL) {
+ SPDK_ERRLOG("Ceph%d: rbd pool name needs to be provided\n", i);
+ rc = -1;
+ goto end;
+ }
+
+ rbd_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 1);
+ if (rbd_name == NULL) {
+ SPDK_ERRLOG("Ceph%d: format error\n", i);
+ rc = -1;
+ goto end;
+ }
+
+ val = spdk_conf_section_get_nmval(sp, "Ceph", i, 2);
+
+ if (val == NULL) {
+ block_size = 512; /* default value */
+ } else {
+ tmp = spdk_strtol(val, 10);
+ if (tmp <= 0) {
+ SPDK_ERRLOG("Invalid block size\n");
+ rc = -1;
+ goto end;
+ } else if (tmp & 0x1ff) {
+ SPDK_ERRLOG("current block_size = %ld, it should be multiple of 512\n",
+ tmp);
+ rc = -1;
+ goto end;
+ }
+ block_size = (uint32_t)tmp;
+ }
+
+ /* TODO(?): user_id and rbd config values */
+ rc = bdev_rbd_create(&bdev, NULL, NULL, pool_name, NULL, rbd_name, block_size);
+ if (rc) {
+ goto end;
+ }
+ }
+
+end:
+ return rc;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("bdev_rbd", SPDK_LOG_BDEV_RBD)
diff --git a/src/spdk/module/bdev/rbd/bdev_rbd.h b/src/spdk/module/bdev/rbd/bdev_rbd.h
new file mode 100644
index 000000000..1d16a02db
--- /dev/null
+++ b/src/spdk/module/bdev/rbd/bdev_rbd.h
@@ -0,0 +1,68 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef SPDK_BDEV_RBD_H
+#define SPDK_BDEV_RBD_H
+
+#include "spdk/stdinc.h"
+
+#include "spdk/bdev.h"
+
+void bdev_rbd_free_config(char **config);
+char **bdev_rbd_dup_config(const char *const *config);
+
+typedef void (*spdk_delete_rbd_complete)(void *cb_arg, int bdeverrno);
+
+int bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id,
+ const char *pool_name,
+ const char *const *config,
+ const char *rbd_name, uint32_t block_size);
+/**
+ * Delete rbd bdev.
+ *
+ * \param bdev Pointer to rbd bdev.
+ * \param cb_fn Function to call after deletion.
+ * \param cb_arg Argument to pass to cb_fn.
+ */
+void bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn,
+ void *cb_arg);
+
+/**
+ * Resize rbd bdev.
+ *
+ * \param bdev Pointer to rbd bdev.
+ * \param new_size_in_mb The new size in MiB for this bdev.
+ */
+int bdev_rbd_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_mb);
+
+#endif /* SPDK_BDEV_RBD_H */
diff --git a/src/spdk/module/bdev/rbd/bdev_rbd_rpc.c b/src/spdk/module/bdev/rbd/bdev_rbd_rpc.c
new file mode 100644
index 000000000..c60c83a58
--- /dev/null
+++ b/src/spdk/module/bdev/rbd/bdev_rbd_rpc.c
@@ -0,0 +1,252 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bdev_rbd.h"
+#include "spdk/rpc.h"
+#include "spdk/util.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+
+struct rpc_create_rbd {
+ char *name;
+ char *user_id;
+ char *pool_name;
+ char *rbd_name;
+ uint32_t block_size;
+ char **config;
+};
+
+static void
+free_rpc_create_rbd(struct rpc_create_rbd *req)
+{
+ free(req->name);
+ free(req->user_id);
+ free(req->pool_name);
+ free(req->rbd_name);
+ bdev_rbd_free_config(req->config);
+}
+
+static int
+bdev_rbd_decode_config(const struct spdk_json_val *values, void *out)
+{
+ char ***map = out;
+ char **entry;
+ uint32_t i;
+
+ if (values->type == SPDK_JSON_VAL_NULL) {
+ /* treated like empty object: empty config */
+ *map = calloc(1, sizeof(**map));
+ if (!*map) {
+ return -1;
+ }
+ return 0;
+ }
+
+ if (values->type != SPDK_JSON_VAL_OBJECT_BEGIN) {
+ return -1;
+ }
+
+ *map = calloc(values->len + 1, sizeof(**map));
+ if (!*map) {
+ return -1;
+ }
+
+ for (i = 0, entry = *map; i < values->len;) {
+ const struct spdk_json_val *name = &values[i + 1];
+ const struct spdk_json_val *v = &values[i + 2];
+ /* Here we catch errors like invalid types. */
+ if (!(entry[0] = spdk_json_strdup(name)) ||
+ !(entry[1] = spdk_json_strdup(v))) {
+ bdev_rbd_free_config(*map);
+ *map = NULL;
+ return -1;
+ }
+ i += 1 + spdk_json_val_len(v);
+ entry += 2;
+ }
+
+ return 0;
+}
+
+static const struct spdk_json_object_decoder rpc_create_rbd_decoders[] = {
+ {"name", offsetof(struct rpc_create_rbd, name), spdk_json_decode_string, true},
+ {"user_id", offsetof(struct rpc_create_rbd, user_id), spdk_json_decode_string, true},
+ {"pool_name", offsetof(struct rpc_create_rbd, pool_name), spdk_json_decode_string},
+ {"rbd_name", offsetof(struct rpc_create_rbd, rbd_name), spdk_json_decode_string},
+ {"block_size", offsetof(struct rpc_create_rbd, block_size), spdk_json_decode_uint32},
+ {"config", offsetof(struct rpc_create_rbd, config), bdev_rbd_decode_config, true}
+};
+
+static void
+rpc_bdev_rbd_create(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_create_rbd req = {};
+ struct spdk_json_write_ctx *w;
+ struct spdk_bdev *bdev;
+ int rc = 0;
+
+ if (spdk_json_decode_object(params, rpc_create_rbd_decoders,
+ SPDK_COUNTOF(rpc_create_rbd_decoders),
+ &req)) {
+ SPDK_DEBUGLOG(SPDK_LOG_BDEV_RBD, "spdk_json_decode_object failed\n");
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ rc = bdev_rbd_create(&bdev, req.name, req.user_id, req.pool_name,
+ (const char *const *)req.config,
+ req.rbd_name,
+ req.block_size);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
+ goto cleanup;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_string(w, spdk_bdev_get_name(bdev));
+ spdk_jsonrpc_end_result(request, w);
+
+cleanup:
+ free_rpc_create_rbd(&req);
+}
+SPDK_RPC_REGISTER("bdev_rbd_create", rpc_bdev_rbd_create, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_rbd_create, construct_rbd_bdev)
+
+struct rpc_bdev_rbd_delete {
+ char *name;
+};
+
+static void
+free_rpc_bdev_rbd_delete(struct rpc_bdev_rbd_delete *req)
+{
+ free(req->name);
+}
+
+static const struct spdk_json_object_decoder rpc_bdev_rbd_delete_decoders[] = {
+ {"name", offsetof(struct rpc_bdev_rbd_delete, name), spdk_json_decode_string},
+};
+
+static void
+_rpc_bdev_rbd_delete_cb(void *cb_arg, int bdeverrno)
+{
+ struct spdk_jsonrpc_request *request = cb_arg;
+ struct spdk_json_write_ctx *w;
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, bdeverrno == 0);
+ spdk_jsonrpc_end_result(request, w);
+}
+
+static void
+rpc_bdev_rbd_delete(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_rbd_delete req = {NULL};
+ struct spdk_bdev *bdev;
+
+ if (spdk_json_decode_object(params, rpc_bdev_rbd_delete_decoders,
+ SPDK_COUNTOF(rpc_bdev_rbd_delete_decoders),
+ &req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV));
+ goto cleanup;
+ }
+
+ bdev_rbd_delete(bdev, _rpc_bdev_rbd_delete_cb, request);
+
+cleanup:
+ free_rpc_bdev_rbd_delete(&req);
+}
+SPDK_RPC_REGISTER("bdev_rbd_delete", rpc_bdev_rbd_delete, SPDK_RPC_RUNTIME)
+SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_rbd_delete, delete_rbd_bdev)
+
+struct rpc_bdev_rbd_resize {
+ char *name;
+ uint64_t new_size;
+};
+
+static const struct spdk_json_object_decoder rpc_bdev_rbd_resize_decoders[] = {
+ {"name", offsetof(struct rpc_bdev_rbd_resize, name), spdk_json_decode_string},
+ {"new_size", offsetof(struct rpc_bdev_rbd_resize, new_size), spdk_json_decode_uint64}
+};
+
+static void
+free_rpc_bdev_rbd_resize(struct rpc_bdev_rbd_resize *req)
+{
+ free(req->name);
+}
+
+static void
+rpc_bdev_rbd_resize(struct spdk_jsonrpc_request *request,
+ const struct spdk_json_val *params)
+{
+ struct rpc_bdev_rbd_resize req = {};
+ struct spdk_bdev *bdev;
+ struct spdk_json_write_ctx *w;
+ int rc;
+
+ if (spdk_json_decode_object(params, rpc_bdev_rbd_resize_decoders,
+ SPDK_COUNTOF(rpc_bdev_rbd_resize_decoders),
+ &req)) {
+ spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
+ "spdk_json_decode_object failed");
+ goto cleanup;
+ }
+
+ bdev = spdk_bdev_get_by_name(req.name);
+ if (bdev == NULL) {
+ spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV));
+ goto cleanup;
+ }
+
+ rc = bdev_rbd_resize(bdev, req.new_size);
+ if (rc) {
+ spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
+ goto cleanup;
+ }
+
+ w = spdk_jsonrpc_begin_result(request);
+ spdk_json_write_bool(w, true);
+ spdk_jsonrpc_end_result(request, w);
+cleanup:
+ free_rpc_bdev_rbd_resize(&req);
+}
+SPDK_RPC_REGISTER("bdev_rbd_resize", rpc_bdev_rbd_resize, SPDK_RPC_RUNTIME)