summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/bdev/part.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/lib/bdev/part.c')
-rw-r--r--src/spdk/lib/bdev/part.c524
1 files changed, 524 insertions, 0 deletions
diff --git a/src/spdk/lib/bdev/part.c b/src/spdk/lib/bdev/part.c
new file mode 100644
index 000000000..01a395591
--- /dev/null
+++ b/src/spdk/lib/bdev/part.c
@@ -0,0 +1,524 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Common code for partition-like virtual bdevs.
+ */
+
+#include "spdk/bdev.h"
+#include "spdk/likely.h"
+#include "spdk/log.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+
+#include "spdk/bdev_module.h"
+
+struct spdk_bdev_part_base {
+ struct spdk_bdev *bdev;
+ struct spdk_bdev_desc *desc;
+ uint32_t ref;
+ uint32_t channel_size;
+ spdk_bdev_part_base_free_fn base_free_fn;
+ void *ctx;
+ bool claimed;
+ struct spdk_bdev_module *module;
+ struct spdk_bdev_fn_table *fn_table;
+ struct bdev_part_tailq *tailq;
+ spdk_io_channel_create_cb ch_create_cb;
+ spdk_io_channel_destroy_cb ch_destroy_cb;
+ struct spdk_thread *thread;
+};
+
+struct spdk_bdev *
+spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->bdev;
+}
+
+struct spdk_bdev_desc *
+spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->desc;
+}
+
+struct bdev_part_tailq *
+spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->tailq;
+}
+
+void *
+spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->ctx;
+}
+
+const char *
+spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base)
+{
+ return part_base->bdev->name;
+}
+
+static void
+bdev_part_base_free(void *ctx)
+{
+ struct spdk_bdev_desc *desc = ctx;
+
+ spdk_bdev_close(desc);
+}
+
+void
+spdk_bdev_part_base_free(struct spdk_bdev_part_base *base)
+{
+ if (base->desc) {
+ /* Close the underlying bdev on its same opened thread. */
+ if (base->thread && base->thread != spdk_get_thread()) {
+ spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc);
+ } else {
+ spdk_bdev_close(base->desc);
+ }
+ }
+
+ if (base->base_free_fn != NULL) {
+ base->base_free_fn(base->ctx);
+ }
+
+ free(base);
+}
+
+static void
+bdev_part_free_cb(void *io_device)
+{
+ struct spdk_bdev_part *part = io_device;
+ struct spdk_bdev_part_base *base;
+
+ assert(part);
+ assert(part->internal.base);
+
+ base = part->internal.base;
+
+ TAILQ_REMOVE(base->tailq, part, tailq);
+
+ if (--base->ref == 0) {
+ spdk_bdev_module_release_bdev(base->bdev);
+ spdk_bdev_part_base_free(base);
+ }
+
+ spdk_bdev_destruct_done(&part->internal.bdev, 0);
+ free(part->internal.bdev.name);
+ free(part->internal.bdev.product_name);
+ free(part);
+}
+
+int
+spdk_bdev_part_free(struct spdk_bdev_part *part)
+{
+ spdk_io_device_unregister(part, bdev_part_free_cb);
+
+ /* Return 1 to indicate that this is an asynchronous operation that isn't complete
+ * until spdk_bdev_destruct_done is called */
+ return 1;
+}
+
+void
+spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq)
+{
+ struct spdk_bdev_part *part, *tmp;
+
+ TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) {
+ if (part->internal.base == part_base) {
+ spdk_bdev_unregister(&part->internal.bdev, NULL, NULL);
+ }
+ }
+}
+
+static bool
+bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type)
+{
+ struct spdk_bdev_part *part = _part;
+
+ /* We can't decode/modify passthrough NVMe commands, so don't report
+ * that a partition supports these io types, even if the underlying
+ * bdev does.
+ */
+ switch (io_type) {
+ case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
+ case SPDK_BDEV_IO_TYPE_NVME_IO:
+ case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
+ return false;
+ default:
+ break;
+ }
+
+ return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt,
+ io_type);
+}
+
+static struct spdk_io_channel *
+bdev_part_get_io_channel(void *_part)
+{
+ struct spdk_bdev_part *part = _part;
+
+ return spdk_get_io_channel(part);
+}
+
+struct spdk_bdev *
+spdk_bdev_part_get_bdev(struct spdk_bdev_part *part)
+{
+ return &part->internal.bdev;
+}
+
+struct spdk_bdev_part_base *
+spdk_bdev_part_get_base(struct spdk_bdev_part *part)
+{
+ return part->internal.base;
+}
+
+struct spdk_bdev *
+spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part)
+{
+ return part->internal.base->bdev;
+}
+
+uint64_t
+spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part)
+{
+ return part->internal.offset_blocks;
+}
+
+static int
+bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset,
+ uint32_t remapped_offset)
+{
+ struct spdk_bdev *bdev = bdev_io->bdev;
+ struct spdk_dif_ctx dif_ctx;
+ struct spdk_dif_error err_blk = {};
+ int rc;
+
+ if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) {
+ return 0;
+ }
+
+ rc = spdk_dif_ctx_init(&dif_ctx,
+ bdev->blocklen, bdev->md_len, bdev->md_interleave,
+ bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags,
+ offset, 0, 0, 0, 0);
+ if (rc != 0) {
+ SPDK_ERRLOG("Initialization of DIF context failed\n");
+ return rc;
+ }
+
+ spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
+
+ if (bdev->md_interleave) {
+ rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
+ } else {
+ struct iovec md_iov = {
+ .iov_base = bdev_io->u.bdev.md_buf,
+ .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len,
+ };
+
+ rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk);
+ }
+
+ if (rc != 0) {
+ SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n",
+ err_blk.err_type, err_blk.err_offset);
+ }
+
+ return rc;
+}
+
+static void
+bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *part_io = cb_arg;
+ uint32_t offset, remapped_offset;
+ int rc, status;
+
+ offset = bdev_io->u.bdev.offset_blocks;
+ remapped_offset = part_io->u.bdev.offset_blocks;
+
+ if (success) {
+ rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
+ if (rc != 0) {
+ success = false;
+ }
+ }
+
+ status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+
+ spdk_bdev_io_complete(part_io, status);
+ spdk_bdev_free_io(bdev_io);
+}
+
+static void
+bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *part_io = cb_arg;
+ int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+
+ spdk_bdev_io_complete(part_io, status);
+ spdk_bdev_free_io(bdev_io);
+}
+
+static void
+bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct spdk_bdev_io *part_io = cb_arg;
+ int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
+
+ spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len);
+ spdk_bdev_io_complete(part_io, status);
+ spdk_bdev_free_io(bdev_io);
+}
+
+int
+spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io)
+{
+ struct spdk_bdev_part *part = ch->part;
+ struct spdk_io_channel *base_ch = ch->base_ch;
+ struct spdk_bdev_desc *base_desc = part->internal.base->desc;
+ uint64_t offset, remapped_offset;
+ int rc = 0;
+
+ offset = bdev_io->u.bdev.offset_blocks;
+ remapped_offset = offset + part->internal.offset_blocks;
+
+ /* Modify the I/O to adjust for the offset within the base bdev. */
+ switch (bdev_io->type) {
+ case SPDK_BDEV_IO_TYPE_READ:
+ if (bdev_io->u.bdev.md_buf == NULL) {
+ rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt, remapped_offset,
+ bdev_io->u.bdev.num_blocks,
+ bdev_part_complete_read_io, bdev_io);
+ } else {
+ rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.md_buf, remapped_offset,
+ bdev_io->u.bdev.num_blocks,
+ bdev_part_complete_read_io, bdev_io);
+ }
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE:
+ rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset);
+ if (rc != 0) {
+ return SPDK_BDEV_IO_STATUS_FAILED;
+ }
+
+ if (bdev_io->u.bdev.md_buf == NULL) {
+ rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt, remapped_offset,
+ bdev_io->u.bdev.num_blocks,
+ bdev_part_complete_io, bdev_io);
+ } else {
+ rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch,
+ bdev_io->u.bdev.iovs,
+ bdev_io->u.bdev.iovcnt,
+ bdev_io->u.bdev.md_buf, remapped_offset,
+ bdev_io->u.bdev.num_blocks,
+ bdev_part_complete_io, bdev_io);
+ }
+ break;
+ case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
+ rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset,
+ bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
+ bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_UNMAP:
+ rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset,
+ bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
+ bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_FLUSH:
+ rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset,
+ bdev_io->u.bdev.num_blocks, bdev_part_complete_io,
+ bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_RESET:
+ rc = spdk_bdev_reset(base_desc, base_ch,
+ bdev_part_complete_io, bdev_io);
+ break;
+ case SPDK_BDEV_IO_TYPE_ZCOPY:
+ rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset,
+ bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate,
+ bdev_part_complete_zcopy_io, bdev_io);
+ break;
+ default:
+ SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type);
+ return SPDK_BDEV_IO_STATUS_FAILED;
+ }
+
+ return rc;
+}
+
+static int
+bdev_part_channel_create_cb(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
+ struct spdk_bdev_part_channel *ch = ctx_buf;
+
+ ch->part = part;
+ ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc);
+ if (ch->base_ch == NULL) {
+ return -1;
+ }
+
+ if (part->internal.base->ch_create_cb) {
+ return part->internal.base->ch_create_cb(io_device, ctx_buf);
+ } else {
+ return 0;
+ }
+}
+
+static void
+bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf)
+{
+ struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device;
+ struct spdk_bdev_part_channel *ch = ctx_buf;
+
+ if (part->internal.base->ch_destroy_cb) {
+ part->internal.base->ch_destroy_cb(io_device, ctx_buf);
+ }
+ spdk_put_io_channel(ch->base_ch);
+}
+
+struct spdk_bdev_part_base *
+ spdk_bdev_part_base_construct(struct spdk_bdev *bdev,
+ spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module,
+ struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq,
+ spdk_bdev_part_base_free_fn free_fn, void *ctx,
+ uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb,
+ spdk_io_channel_destroy_cb ch_destroy_cb)
+{
+ int rc;
+ struct spdk_bdev_part_base *base;
+
+ base = calloc(1, sizeof(*base));
+ if (!base) {
+ SPDK_ERRLOG("Memory allocation failure\n");
+ return NULL;
+ }
+ fn_table->get_io_channel = bdev_part_get_io_channel;
+ fn_table->io_type_supported = bdev_part_io_type_supported;
+
+ base->bdev = bdev;
+ base->desc = NULL;
+ base->ref = 0;
+ base->module = module;
+ base->fn_table = fn_table;
+ base->tailq = tailq;
+ base->base_free_fn = free_fn;
+ base->ctx = ctx;
+ base->claimed = false;
+ base->channel_size = channel_size;
+ base->ch_create_cb = ch_create_cb;
+ base->ch_destroy_cb = ch_destroy_cb;
+
+ rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc);
+ if (rc) {
+ spdk_bdev_part_base_free(base);
+ SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev),
+ spdk_strerror(-rc));
+ return NULL;
+ }
+
+ /* Save the thread where the base device is opened */
+ base->thread = spdk_get_thread();
+
+ return base;
+}
+
+int
+spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base,
+ char *name, uint64_t offset_blocks, uint64_t num_blocks,
+ char *product_name)
+{
+ part->internal.bdev.blocklen = base->bdev->blocklen;
+ part->internal.bdev.blockcnt = num_blocks;
+ part->internal.offset_blocks = offset_blocks;
+
+ part->internal.bdev.write_cache = base->bdev->write_cache;
+ part->internal.bdev.required_alignment = base->bdev->required_alignment;
+ part->internal.bdev.ctxt = part;
+ part->internal.bdev.module = base->module;
+ part->internal.bdev.fn_table = base->fn_table;
+
+ part->internal.bdev.md_interleave = base->bdev->md_interleave;
+ part->internal.bdev.md_len = base->bdev->md_len;
+ part->internal.bdev.dif_type = base->bdev->dif_type;
+ part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md;
+ part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags;
+
+ part->internal.bdev.name = strdup(name);
+ part->internal.bdev.product_name = strdup(product_name);
+
+ if (part->internal.bdev.name == NULL) {
+ SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev));
+ return -1;
+ } else if (part->internal.bdev.product_name == NULL) {
+ free(part->internal.bdev.name);
+ SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n",
+ spdk_bdev_get_name(base->bdev));
+ return -1;
+ }
+
+ base->ref++;
+ part->internal.base = base;
+
+ if (!base->claimed) {
+ int rc;
+
+ rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module);
+ if (rc) {
+ SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev));
+ free(part->internal.bdev.name);
+ free(part->internal.bdev.product_name);
+ return -1;
+ }
+ base->claimed = true;
+ }
+
+ spdk_io_device_register(part, bdev_part_channel_create_cb,
+ bdev_part_channel_destroy_cb,
+ base->channel_size,
+ name);
+
+ spdk_bdev_register(&part->internal.bdev);
+ TAILQ_INSERT_TAIL(base->tailq, part, tailq);
+
+ return 0;
+}