diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/module/bdev/rbd | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/module/bdev/rbd')
-rw-r--r-- | src/spdk/module/bdev/rbd/Makefile | 45 | ||||
-rw-r--r-- | src/spdk/module/bdev/rbd/bdev_rbd.c | 898 | ||||
-rw-r--r-- | src/spdk/module/bdev/rbd/bdev_rbd.h | 68 | ||||
-rw-r--r-- | src/spdk/module/bdev/rbd/bdev_rbd_rpc.c | 252 |
4 files changed, 1263 insertions, 0 deletions
diff --git a/src/spdk/module/bdev/rbd/Makefile b/src/spdk/module/bdev/rbd/Makefile new file mode 100644 index 000000000..055e14dac --- /dev/null +++ b/src/spdk/module/bdev/rbd/Makefile @@ -0,0 +1,45 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..) +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +SO_VER := 3 +SO_MINOR := 0 + +C_SRCS = bdev_rbd.c bdev_rbd_rpc.c +LIBNAME = bdev_rbd + +SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map + +include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk diff --git a/src/spdk/module/bdev/rbd/bdev_rbd.c b/src/spdk/module/bdev/rbd/bdev_rbd.c new file mode 100644 index 000000000..f3b2547c4 --- /dev/null +++ b/src/spdk/module/bdev/rbd/bdev_rbd.c @@ -0,0 +1,898 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spdk/stdinc.h" + +#include "bdev_rbd.h" + +#include <rbd/librbd.h> +#include <rados/librados.h> +#include <sys/eventfd.h> + +#include "spdk/conf.h" +#include "spdk/env.h" +#include "spdk/bdev.h" +#include "spdk/thread.h" +#include "spdk/json.h" +#include "spdk/string.h" +#include "spdk/util.h" + +#include "spdk/bdev_module.h" +#include "spdk_internal/log.h" + +#define SPDK_RBD_QUEUE_DEPTH 128 + +static int bdev_rbd_count = 0; + +#define BDEV_RBD_POLL_US 50 + +struct bdev_rbd { + struct spdk_bdev disk; + char *rbd_name; + char *user_id; + char *pool_name; + char **config; + rbd_image_info_t info; + TAILQ_ENTRY(bdev_rbd) tailq; + struct spdk_poller *reset_timer; + struct spdk_bdev_io *reset_bdev_io; +}; + +struct bdev_rbd_io_channel { + rados_ioctx_t io_ctx; + rados_t cluster; + struct pollfd pfd; + rbd_image_t image; + struct bdev_rbd *disk; + struct spdk_poller *poller; +}; + +struct bdev_rbd_io { + uint64_t remaining_len; + int num_segments; + bool failed; +}; + +static void +bdev_rbd_free(struct bdev_rbd *rbd) +{ + if (!rbd) { + return; + } + + free(rbd->disk.name); + free(rbd->rbd_name); + free(rbd->user_id); + free(rbd->pool_name); + bdev_rbd_free_config(rbd->config); + free(rbd); +} + +void +bdev_rbd_free_config(char **config) +{ + char **entry; + + if (config) { + for (entry = config; *entry; entry++) { + free(*entry); + } + free(config); + } +} + +char ** +bdev_rbd_dup_config(const char *const *config) +{ + size_t count; + char **copy; + + if (!config) { + return NULL; + } + for (count = 0; config[count]; count++) {} + copy = calloc(count + 1, sizeof(*copy)); + if (!copy) { + return NULL; + } + for (count = 0; config[count]; count++) { + if (!(copy[count] = strdup(config[count]))) { + bdev_rbd_free_config(copy); + return NULL; + } + } + return copy; +} + +static int +bdev_rados_context_init(const char *user_id, const char *rbd_pool_name, const char *const *config, + rados_t *cluster, rados_ioctx_t *io_ctx) +{ + int ret; + + ret = rados_create(cluster, user_id); + if (ret < 0) { + SPDK_ERRLOG("Failed to create rados_t struct\n"); + return -1; + } + + if (config) { + const char *const *entry = config; + while (*entry) { + ret = rados_conf_set(*cluster, entry[0], entry[1]); + if (ret < 0) { + SPDK_ERRLOG("Failed to set %s = %s\n", entry[0], entry[1]); + rados_shutdown(*cluster); + return -1; + } + entry += 2; + } + } else { + ret = rados_conf_read_file(*cluster, NULL); + if (ret < 0) { + SPDK_ERRLOG("Failed to read conf file\n"); + rados_shutdown(*cluster); + return -1; + } + } + + ret = rados_connect(*cluster); + if (ret < 0) { + SPDK_ERRLOG("Failed to connect to rbd_pool\n"); + rados_shutdown(*cluster); + return -1; + } + + ret = rados_ioctx_create(*cluster, rbd_pool_name, io_ctx); + + if (ret < 0) { + SPDK_ERRLOG("Failed to create ioctx\n"); + rados_shutdown(*cluster); + return -1; + } + + return 0; +} + +static int +bdev_rbd_init(const char *user_id, const char *rbd_pool_name, const char *const *config, + const char *rbd_name, rbd_image_info_t *info) +{ + int ret; + rados_t cluster = NULL; + rados_ioctx_t io_ctx = NULL; + rbd_image_t image = NULL; + + ret = bdev_rados_context_init(user_id, rbd_pool_name, config, &cluster, &io_ctx); + if (ret < 0) { + SPDK_ERRLOG("Failed to create rados context for user_id=%s and rbd_pool=%s\n", + user_id ? user_id : "admin (the default)", rbd_pool_name); + return -1; + } + + ret = rbd_open(io_ctx, rbd_name, &image, NULL); + if (ret < 0) { + SPDK_ERRLOG("Failed to open specified rbd device\n"); + goto err; + } + ret = rbd_stat(image, info, sizeof(*info)); + rbd_close(image); + if (ret < 0) { + SPDK_ERRLOG("Failed to stat specified rbd device\n"); + goto err; + } + + rados_ioctx_destroy(io_ctx); + return 0; +err: + rados_ioctx_destroy(io_ctx); + rados_shutdown(cluster); + return -1; +} + +static void +bdev_rbd_exit(rbd_image_t image) +{ + rbd_flush(image); + rbd_close(image); +} + +static void +bdev_rbd_finish_aiocb(rbd_completion_t cb, void *arg) +{ + /* Doing nothing here */ +} + +static int +bdev_rbd_start_aio(rbd_image_t image, struct spdk_bdev_io *bdev_io, + void *buf, uint64_t offset, size_t len) +{ + int ret; + rbd_completion_t comp; + + ret = rbd_aio_create_completion(bdev_io, bdev_rbd_finish_aiocb, + &comp); + if (ret < 0) { + return -1; + } + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + ret = rbd_aio_read(image, offset, len, + buf, comp); + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) { + ret = rbd_aio_write(image, offset, len, + buf, comp); + } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) { + ret = rbd_aio_flush(image, comp); + } + + if (ret < 0) { + rbd_aio_release(comp); + return -1; + } + + return 0; +} + +static int bdev_rbd_library_init(void); + +static int +bdev_rbd_get_ctx_size(void) +{ + return sizeof(struct bdev_rbd_io); +} + +static struct spdk_bdev_module rbd_if = { + .name = "rbd", + .module_init = bdev_rbd_library_init, + .get_ctx_size = bdev_rbd_get_ctx_size, + +}; +SPDK_BDEV_MODULE_REGISTER(rbd, &rbd_if) + +static int64_t +bdev_rbd_rw(struct bdev_rbd *disk, struct spdk_io_channel *ch, + struct spdk_bdev_io *bdev_io, struct iovec *iov, + int iovcnt, size_t len, uint64_t offset) +{ + struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx; + struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch); + size_t remaining = len; + int i, rc; + + rbd_io->remaining_len = 0; + rbd_io->num_segments = 0; + rbd_io->failed = false; + + for (i = 0; i < iovcnt && remaining > 0; i++) { + size_t seg_len = spdk_min(remaining, iov[i].iov_len); + + rc = bdev_rbd_start_aio(rbdio_ch->image, bdev_io, iov[i].iov_base, offset, seg_len); + if (rc) { + /* + * This bdev_rbd_start_aio() call failed, but if any previous ones were + * submitted, we need to wait for them to finish. + */ + if (rbd_io->num_segments == 0) { + /* No previous I/O submitted - return error code immediately. */ + return rc; + } + + /* Return and wait for outstanding I/O to complete. */ + rbd_io->failed = true; + return 0; + } + + rbd_io->num_segments++; + rbd_io->remaining_len += seg_len; + + offset += seg_len; + remaining -= seg_len; + } + + return 0; +} + +static int64_t +bdev_rbd_flush(struct bdev_rbd *disk, struct spdk_io_channel *ch, + struct spdk_bdev_io *bdev_io, uint64_t offset, uint64_t nbytes) +{ + struct bdev_rbd_io_channel *rbdio_ch = spdk_io_channel_get_ctx(ch); + struct bdev_rbd_io *rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx; + + rbd_io->num_segments++; + return bdev_rbd_start_aio(rbdio_ch->image, bdev_io, NULL, offset, nbytes); +} + +static int +bdev_rbd_reset_timer(void *arg) +{ + struct bdev_rbd *disk = arg; + + /* + * TODO: This should check if any I/O is still in flight before completing the reset. + * For now, just complete after the timer expires. + */ + spdk_bdev_io_complete(disk->reset_bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); + spdk_poller_unregister(&disk->reset_timer); + disk->reset_bdev_io = NULL; + + return SPDK_POLLER_BUSY; +} + +static int +bdev_rbd_reset(struct bdev_rbd *disk, struct spdk_bdev_io *bdev_io) +{ + /* + * HACK: Since librbd doesn't provide any way to cancel outstanding aio, just kick off a + * timer to wait for in-flight I/O to complete. + */ + assert(disk->reset_bdev_io == NULL); + disk->reset_bdev_io = bdev_io; + disk->reset_timer = SPDK_POLLER_REGISTER(bdev_rbd_reset_timer, disk, 1 * 1000 * 1000); + + return 0; +} + +static int +bdev_rbd_destruct(void *ctx) +{ + struct bdev_rbd *rbd = ctx; + + spdk_io_device_unregister(rbd, NULL); + + bdev_rbd_free(rbd); + return 0; +} + +static void +bdev_rbd_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, + bool success) +{ + int ret; + + if (!success) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + return; + } + + ret = bdev_rbd_rw(bdev_io->bdev->ctxt, + ch, + bdev_io, + bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, + bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); + + if (ret != 0) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static int _bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + spdk_bdev_io_get_buf(bdev_io, bdev_rbd_get_buf_cb, + bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); + return 0; + + case SPDK_BDEV_IO_TYPE_WRITE: + return bdev_rbd_rw((struct bdev_rbd *)bdev_io->bdev->ctxt, + ch, + bdev_io, + bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen, + bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen); + + case SPDK_BDEV_IO_TYPE_FLUSH: + return bdev_rbd_flush((struct bdev_rbd *)bdev_io->bdev->ctxt, + ch, + bdev_io, + bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen, + bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); + + case SPDK_BDEV_IO_TYPE_RESET: + return bdev_rbd_reset((struct bdev_rbd *)bdev_io->bdev->ctxt, + bdev_io); + + default: + return -1; + } + return 0; +} + +static void bdev_rbd_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) +{ + if (_bdev_rbd_submit_request(ch, bdev_io) < 0) { + spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); + } +} + +static bool +bdev_rbd_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) +{ + switch (io_type) { + case SPDK_BDEV_IO_TYPE_READ: + case SPDK_BDEV_IO_TYPE_WRITE: + case SPDK_BDEV_IO_TYPE_FLUSH: + case SPDK_BDEV_IO_TYPE_RESET: + return true; + + default: + return false; + } +} + +static int +bdev_rbd_io_poll(void *arg) +{ + struct bdev_rbd_io_channel *ch = arg; + int i, io_status, rc; + rbd_completion_t comps[SPDK_RBD_QUEUE_DEPTH]; + struct spdk_bdev_io *bdev_io; + struct bdev_rbd_io *rbd_io; + + rc = poll(&ch->pfd, 1, 0); + + /* check the return value of poll since we have only one fd for each channel */ + if (rc != 1) { + return SPDK_POLLER_BUSY; + } + + rc = rbd_poll_io_events(ch->image, comps, SPDK_RBD_QUEUE_DEPTH); + for (i = 0; i < rc; i++) { + bdev_io = rbd_aio_get_arg(comps[i]); + rbd_io = (struct bdev_rbd_io *)bdev_io->driver_ctx; + io_status = rbd_aio_get_return_value(comps[i]); + + assert(rbd_io->num_segments > 0); + rbd_io->num_segments--; + + if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { + if (io_status > 0) { + /* For reads, io_status is the length */ + rbd_io->remaining_len -= io_status; + } + + if (rbd_io->num_segments == 0 && rbd_io->remaining_len != 0) { + rbd_io->failed = true; + } + } else { + /* For others, 0 means success */ + if (io_status != 0) { + rbd_io->failed = true; + } + } + + rbd_aio_release(comps[i]); + + if (rbd_io->num_segments == 0) { + spdk_bdev_io_complete(bdev_io, + rbd_io->failed ? SPDK_BDEV_IO_STATUS_FAILED : SPDK_BDEV_IO_STATUS_SUCCESS); + } + } + + return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; +} + +static void +bdev_rbd_free_channel(struct bdev_rbd_io_channel *ch) +{ + if (!ch) { + return; + } + + if (ch->image) { + bdev_rbd_exit(ch->image); + } + + if (ch->io_ctx) { + rados_ioctx_destroy(ch->io_ctx); + } + + if (ch->cluster) { + rados_shutdown(ch->cluster); + } + + if (ch->pfd.fd >= 0) { + close(ch->pfd.fd); + } +} + +static void * +bdev_rbd_handle(void *arg) +{ + struct bdev_rbd_io_channel *ch = arg; + void *ret = arg; + + if (rbd_open(ch->io_ctx, ch->disk->rbd_name, &ch->image, NULL) < 0) { + SPDK_ERRLOG("Failed to open specified rbd device\n"); + ret = NULL; + } + + return ret; +} + +static int +bdev_rbd_create_cb(void *io_device, void *ctx_buf) +{ + struct bdev_rbd_io_channel *ch = ctx_buf; + int ret; + + ch->disk = io_device; + ch->image = NULL; + ch->io_ctx = NULL; + ch->pfd.fd = -1; + + ret = bdev_rados_context_init(ch->disk->user_id, ch->disk->pool_name, + (const char *const *)ch->disk->config, + &ch->cluster, &ch->io_ctx); + if (ret < 0) { + SPDK_ERRLOG("Failed to create rados context for user_id %s and rbd_pool=%s\n", + ch->disk->user_id ? ch->disk->user_id : "admin (the default)", ch->disk->pool_name); + goto err; + } + + if (spdk_call_unaffinitized(bdev_rbd_handle, ch) == NULL) { + goto err; + } + + ch->pfd.fd = eventfd(0, EFD_NONBLOCK); + if (ch->pfd.fd < 0) { + SPDK_ERRLOG("Failed to get eventfd\n"); + goto err; + } + + ch->pfd.events = POLLIN; + ret = rbd_set_image_notification(ch->image, ch->pfd.fd, EVENT_TYPE_EVENTFD); + if (ret < 0) { + SPDK_ERRLOG("Failed to set rbd image notification\n"); + goto err; + } + + ch->poller = SPDK_POLLER_REGISTER(bdev_rbd_io_poll, ch, BDEV_RBD_POLL_US); + + return 0; + +err: + bdev_rbd_free_channel(ch); + return -1; +} + +static void +bdev_rbd_destroy_cb(void *io_device, void *ctx_buf) +{ + struct bdev_rbd_io_channel *io_channel = ctx_buf; + + bdev_rbd_free_channel(io_channel); + + spdk_poller_unregister(&io_channel->poller); +} + +static struct spdk_io_channel * +bdev_rbd_get_io_channel(void *ctx) +{ + struct bdev_rbd *rbd_bdev = ctx; + + return spdk_get_io_channel(rbd_bdev); +} + +static int +bdev_rbd_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) +{ + struct bdev_rbd *rbd_bdev = ctx; + + spdk_json_write_named_object_begin(w, "rbd"); + + spdk_json_write_named_string(w, "pool_name", rbd_bdev->pool_name); + + spdk_json_write_named_string(w, "rbd_name", rbd_bdev->rbd_name); + + if (rbd_bdev->user_id) { + spdk_json_write_named_string(w, "user_id", rbd_bdev->user_id); + } + + if (rbd_bdev->config) { + char **entry = rbd_bdev->config; + + spdk_json_write_named_object_begin(w, "config"); + while (*entry) { + spdk_json_write_named_string(w, entry[0], entry[1]); + entry += 2; + } + spdk_json_write_object_end(w); + } + + spdk_json_write_object_end(w); + + return 0; +} + +static void +bdev_rbd_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) +{ + struct bdev_rbd *rbd = bdev->ctxt; + + spdk_json_write_object_begin(w); + + spdk_json_write_named_string(w, "method", "bdev_rbd_create"); + + spdk_json_write_named_object_begin(w, "params"); + spdk_json_write_named_string(w, "name", bdev->name); + spdk_json_write_named_string(w, "pool_name", rbd->pool_name); + spdk_json_write_named_string(w, "rbd_name", rbd->rbd_name); + spdk_json_write_named_uint32(w, "block_size", bdev->blocklen); + if (rbd->user_id) { + spdk_json_write_named_string(w, "user_id", rbd->user_id); + } + + if (rbd->config) { + char **entry = rbd->config; + + spdk_json_write_named_object_begin(w, "config"); + while (*entry) { + spdk_json_write_named_string(w, entry[0], entry[1]); + entry += 2; + } + spdk_json_write_object_end(w); + } + + spdk_json_write_object_end(w); + + spdk_json_write_object_end(w); +} + +static const struct spdk_bdev_fn_table rbd_fn_table = { + .destruct = bdev_rbd_destruct, + .submit_request = bdev_rbd_submit_request, + .io_type_supported = bdev_rbd_io_type_supported, + .get_io_channel = bdev_rbd_get_io_channel, + .dump_info_json = bdev_rbd_dump_info_json, + .write_config_json = bdev_rbd_write_config_json, +}; + +int +bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id, + const char *pool_name, + const char *const *config, + const char *rbd_name, + uint32_t block_size) +{ + struct bdev_rbd *rbd; + int ret; + + if ((pool_name == NULL) || (rbd_name == NULL)) { + return -EINVAL; + } + + rbd = calloc(1, sizeof(struct bdev_rbd)); + if (rbd == NULL) { + SPDK_ERRLOG("Failed to allocate bdev_rbd struct\n"); + return -ENOMEM; + } + + rbd->rbd_name = strdup(rbd_name); + if (!rbd->rbd_name) { + bdev_rbd_free(rbd); + return -ENOMEM; + } + + if (user_id) { + rbd->user_id = strdup(user_id); + if (!rbd->user_id) { + bdev_rbd_free(rbd); + return -ENOMEM; + } + } + + rbd->pool_name = strdup(pool_name); + if (!rbd->pool_name) { + bdev_rbd_free(rbd); + return -ENOMEM; + } + + if (config && !(rbd->config = bdev_rbd_dup_config(config))) { + bdev_rbd_free(rbd); + return -ENOMEM; + } + + ret = bdev_rbd_init(rbd->user_id, rbd->pool_name, + (const char *const *)rbd->config, + rbd_name, &rbd->info); + if (ret < 0) { + bdev_rbd_free(rbd); + SPDK_ERRLOG("Failed to init rbd device\n"); + return ret; + } + + if (name) { + rbd->disk.name = strdup(name); + } else { + rbd->disk.name = spdk_sprintf_alloc("Ceph%d", bdev_rbd_count); + } + if (!rbd->disk.name) { + bdev_rbd_free(rbd); + return -ENOMEM; + } + rbd->disk.product_name = "Ceph Rbd Disk"; + bdev_rbd_count++; + + rbd->disk.write_cache = 0; + rbd->disk.blocklen = block_size; + rbd->disk.blockcnt = rbd->info.size / rbd->disk.blocklen; + rbd->disk.ctxt = rbd; + rbd->disk.fn_table = &rbd_fn_table; + rbd->disk.module = &rbd_if; + + SPDK_NOTICELOG("Add %s rbd disk to lun\n", rbd->disk.name); + + spdk_io_device_register(rbd, bdev_rbd_create_cb, + bdev_rbd_destroy_cb, + sizeof(struct bdev_rbd_io_channel), + rbd_name); + ret = spdk_bdev_register(&rbd->disk); + if (ret) { + spdk_io_device_unregister(rbd, NULL); + bdev_rbd_free(rbd); + return ret; + } + + *bdev = &(rbd->disk); + + return ret; +} + +void +bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn, void *cb_arg) +{ + if (!bdev || bdev->module != &rbd_if) { + cb_fn(cb_arg, -ENODEV); + return; + } + + spdk_bdev_unregister(bdev, cb_fn, cb_arg); +} + +int +bdev_rbd_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_mb) +{ + struct spdk_io_channel *ch; + struct bdev_rbd_io_channel *rbd_io_ch; + int rc; + uint64_t new_size_in_byte; + uint64_t current_size_in_mb; + + if (bdev->module != &rbd_if) { + return -EINVAL; + } + + current_size_in_mb = bdev->blocklen * bdev->blockcnt / (1024 * 1024); + if (current_size_in_mb > new_size_in_mb) { + SPDK_ERRLOG("The new bdev size must be lager than current bdev size.\n"); + return -EINVAL; + } + + ch = bdev_rbd_get_io_channel(bdev); + rbd_io_ch = spdk_io_channel_get_ctx(ch); + new_size_in_byte = new_size_in_mb * 1024 * 1024; + + rc = rbd_resize(rbd_io_ch->image, new_size_in_byte); + if (rc != 0) { + SPDK_ERRLOG("failed to resize the ceph bdev.\n"); + return rc; + } + + rc = spdk_bdev_notify_blockcnt_change(bdev, new_size_in_byte / bdev->blocklen); + if (rc != 0) { + SPDK_ERRLOG("failed to notify block cnt change.\n"); + return rc; + } + + return rc; +} + +static int +bdev_rbd_library_init(void) +{ + int i, rc = 0; + const char *val; + const char *pool_name; + const char *rbd_name; + struct spdk_bdev *bdev; + uint32_t block_size; + long int tmp; + + struct spdk_conf_section *sp = spdk_conf_find_section(NULL, "Ceph"); + + if (sp == NULL) { + /* + * Ceph section not found. Do not initialize any rbd LUNS. + */ + goto end; + } + + /* Init rbd block devices */ + for (i = 0; ; i++) { + val = spdk_conf_section_get_nval(sp, "Ceph", i); + if (val == NULL) { + break; + } + + /* get the Rbd_pool name */ + pool_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 0); + if (pool_name == NULL) { + SPDK_ERRLOG("Ceph%d: rbd pool name needs to be provided\n", i); + rc = -1; + goto end; + } + + rbd_name = spdk_conf_section_get_nmval(sp, "Ceph", i, 1); + if (rbd_name == NULL) { + SPDK_ERRLOG("Ceph%d: format error\n", i); + rc = -1; + goto end; + } + + val = spdk_conf_section_get_nmval(sp, "Ceph", i, 2); + + if (val == NULL) { + block_size = 512; /* default value */ + } else { + tmp = spdk_strtol(val, 10); + if (tmp <= 0) { + SPDK_ERRLOG("Invalid block size\n"); + rc = -1; + goto end; + } else if (tmp & 0x1ff) { + SPDK_ERRLOG("current block_size = %ld, it should be multiple of 512\n", + tmp); + rc = -1; + goto end; + } + block_size = (uint32_t)tmp; + } + + /* TODO(?): user_id and rbd config values */ + rc = bdev_rbd_create(&bdev, NULL, NULL, pool_name, NULL, rbd_name, block_size); + if (rc) { + goto end; + } + } + +end: + return rc; +} + +SPDK_LOG_REGISTER_COMPONENT("bdev_rbd", SPDK_LOG_BDEV_RBD) diff --git a/src/spdk/module/bdev/rbd/bdev_rbd.h b/src/spdk/module/bdev/rbd/bdev_rbd.h new file mode 100644 index 000000000..1d16a02db --- /dev/null +++ b/src/spdk/module/bdev/rbd/bdev_rbd.h @@ -0,0 +1,68 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SPDK_BDEV_RBD_H +#define SPDK_BDEV_RBD_H + +#include "spdk/stdinc.h" + +#include "spdk/bdev.h" + +void bdev_rbd_free_config(char **config); +char **bdev_rbd_dup_config(const char *const *config); + +typedef void (*spdk_delete_rbd_complete)(void *cb_arg, int bdeverrno); + +int bdev_rbd_create(struct spdk_bdev **bdev, const char *name, const char *user_id, + const char *pool_name, + const char *const *config, + const char *rbd_name, uint32_t block_size); +/** + * Delete rbd bdev. + * + * \param bdev Pointer to rbd bdev. + * \param cb_fn Function to call after deletion. + * \param cb_arg Argument to pass to cb_fn. + */ +void bdev_rbd_delete(struct spdk_bdev *bdev, spdk_delete_rbd_complete cb_fn, + void *cb_arg); + +/** + * Resize rbd bdev. + * + * \param bdev Pointer to rbd bdev. + * \param new_size_in_mb The new size in MiB for this bdev. + */ +int bdev_rbd_resize(struct spdk_bdev *bdev, const uint64_t new_size_in_mb); + +#endif /* SPDK_BDEV_RBD_H */ diff --git a/src/spdk/module/bdev/rbd/bdev_rbd_rpc.c b/src/spdk/module/bdev/rbd/bdev_rbd_rpc.c new file mode 100644 index 000000000..c60c83a58 --- /dev/null +++ b/src/spdk/module/bdev/rbd/bdev_rbd_rpc.c @@ -0,0 +1,252 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "bdev_rbd.h" +#include "spdk/rpc.h" +#include "spdk/util.h" +#include "spdk/string.h" +#include "spdk_internal/log.h" + +struct rpc_create_rbd { + char *name; + char *user_id; + char *pool_name; + char *rbd_name; + uint32_t block_size; + char **config; +}; + +static void +free_rpc_create_rbd(struct rpc_create_rbd *req) +{ + free(req->name); + free(req->user_id); + free(req->pool_name); + free(req->rbd_name); + bdev_rbd_free_config(req->config); +} + +static int +bdev_rbd_decode_config(const struct spdk_json_val *values, void *out) +{ + char ***map = out; + char **entry; + uint32_t i; + + if (values->type == SPDK_JSON_VAL_NULL) { + /* treated like empty object: empty config */ + *map = calloc(1, sizeof(**map)); + if (!*map) { + return -1; + } + return 0; + } + + if (values->type != SPDK_JSON_VAL_OBJECT_BEGIN) { + return -1; + } + + *map = calloc(values->len + 1, sizeof(**map)); + if (!*map) { + return -1; + } + + for (i = 0, entry = *map; i < values->len;) { + const struct spdk_json_val *name = &values[i + 1]; + const struct spdk_json_val *v = &values[i + 2]; + /* Here we catch errors like invalid types. */ + if (!(entry[0] = spdk_json_strdup(name)) || + !(entry[1] = spdk_json_strdup(v))) { + bdev_rbd_free_config(*map); + *map = NULL; + return -1; + } + i += 1 + spdk_json_val_len(v); + entry += 2; + } + + return 0; +} + +static const struct spdk_json_object_decoder rpc_create_rbd_decoders[] = { + {"name", offsetof(struct rpc_create_rbd, name), spdk_json_decode_string, true}, + {"user_id", offsetof(struct rpc_create_rbd, user_id), spdk_json_decode_string, true}, + {"pool_name", offsetof(struct rpc_create_rbd, pool_name), spdk_json_decode_string}, + {"rbd_name", offsetof(struct rpc_create_rbd, rbd_name), spdk_json_decode_string}, + {"block_size", offsetof(struct rpc_create_rbd, block_size), spdk_json_decode_uint32}, + {"config", offsetof(struct rpc_create_rbd, config), bdev_rbd_decode_config, true} +}; + +static void +rpc_bdev_rbd_create(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_create_rbd req = {}; + struct spdk_json_write_ctx *w; + struct spdk_bdev *bdev; + int rc = 0; + + if (spdk_json_decode_object(params, rpc_create_rbd_decoders, + SPDK_COUNTOF(rpc_create_rbd_decoders), + &req)) { + SPDK_DEBUGLOG(SPDK_LOG_BDEV_RBD, "spdk_json_decode_object failed\n"); + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + goto cleanup; + } + + rc = bdev_rbd_create(&bdev, req.name, req.user_id, req.pool_name, + (const char *const *)req.config, + req.rbd_name, + req.block_size); + if (rc) { + spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc)); + goto cleanup; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_string(w, spdk_bdev_get_name(bdev)); + spdk_jsonrpc_end_result(request, w); + +cleanup: + free_rpc_create_rbd(&req); +} +SPDK_RPC_REGISTER("bdev_rbd_create", rpc_bdev_rbd_create, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_rbd_create, construct_rbd_bdev) + +struct rpc_bdev_rbd_delete { + char *name; +}; + +static void +free_rpc_bdev_rbd_delete(struct rpc_bdev_rbd_delete *req) +{ + free(req->name); +} + +static const struct spdk_json_object_decoder rpc_bdev_rbd_delete_decoders[] = { + {"name", offsetof(struct rpc_bdev_rbd_delete, name), spdk_json_decode_string}, +}; + +static void +_rpc_bdev_rbd_delete_cb(void *cb_arg, int bdeverrno) +{ + struct spdk_jsonrpc_request *request = cb_arg; + struct spdk_json_write_ctx *w; + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, bdeverrno == 0); + spdk_jsonrpc_end_result(request, w); +} + +static void +rpc_bdev_rbd_delete(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_rbd_delete req = {NULL}; + struct spdk_bdev *bdev; + + if (spdk_json_decode_object(params, rpc_bdev_rbd_delete_decoders, + SPDK_COUNTOF(rpc_bdev_rbd_delete_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + goto cleanup; + } + + bdev = spdk_bdev_get_by_name(req.name); + if (bdev == NULL) { + spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV)); + goto cleanup; + } + + bdev_rbd_delete(bdev, _rpc_bdev_rbd_delete_cb, request); + +cleanup: + free_rpc_bdev_rbd_delete(&req); +} +SPDK_RPC_REGISTER("bdev_rbd_delete", rpc_bdev_rbd_delete, SPDK_RPC_RUNTIME) +SPDK_RPC_REGISTER_ALIAS_DEPRECATED(bdev_rbd_delete, delete_rbd_bdev) + +struct rpc_bdev_rbd_resize { + char *name; + uint64_t new_size; +}; + +static const struct spdk_json_object_decoder rpc_bdev_rbd_resize_decoders[] = { + {"name", offsetof(struct rpc_bdev_rbd_resize, name), spdk_json_decode_string}, + {"new_size", offsetof(struct rpc_bdev_rbd_resize, new_size), spdk_json_decode_uint64} +}; + +static void +free_rpc_bdev_rbd_resize(struct rpc_bdev_rbd_resize *req) +{ + free(req->name); +} + +static void +rpc_bdev_rbd_resize(struct spdk_jsonrpc_request *request, + const struct spdk_json_val *params) +{ + struct rpc_bdev_rbd_resize req = {}; + struct spdk_bdev *bdev; + struct spdk_json_write_ctx *w; + int rc; + + if (spdk_json_decode_object(params, rpc_bdev_rbd_resize_decoders, + SPDK_COUNTOF(rpc_bdev_rbd_resize_decoders), + &req)) { + spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, + "spdk_json_decode_object failed"); + goto cleanup; + } + + bdev = spdk_bdev_get_by_name(req.name); + if (bdev == NULL) { + spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV)); + goto cleanup; + } + + rc = bdev_rbd_resize(bdev, req.new_size); + if (rc) { + spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc)); + goto cleanup; + } + + w = spdk_jsonrpc_begin_result(request); + spdk_json_write_bool(w, true); + spdk_jsonrpc_end_result(request, w); +cleanup: + free_rpc_bdev_rbd_resize(&req); +} +SPDK_RPC_REGISTER("bdev_rbd_resize", rpc_bdev_rbd_resize, SPDK_RPC_RUNTIME) |