diff options
Diffstat (limited to 'src/spdk/lib/bdev/part.c')
-rw-r--r-- | src/spdk/lib/bdev/part.c | 524 |
1 files changed, 524 insertions, 0 deletions
diff --git a/src/spdk/lib/bdev/part.c b/src/spdk/lib/bdev/part.c new file mode 100644 index 000000000..01a395591 --- /dev/null +++ b/src/spdk/lib/bdev/part.c @@ -0,0 +1,524 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Common code for partition-like virtual bdevs. + */ + +#include "spdk/bdev.h" +#include "spdk/likely.h" +#include "spdk/log.h" +#include "spdk/string.h" +#include "spdk/thread.h" + +#include "spdk/bdev_module.h" + +struct spdk_bdev_part_base { + struct spdk_bdev *bdev; + struct spdk_bdev_desc *desc; + uint32_t ref; + uint32_t channel_size; + spdk_bdev_part_base_free_fn base_free_fn; + void *ctx; + bool claimed; + struct spdk_bdev_module *module; + struct spdk_bdev_fn_table *fn_table; + struct bdev_part_tailq *tailq; + spdk_io_channel_create_cb ch_create_cb; + spdk_io_channel_destroy_cb ch_destroy_cb; + struct spdk_thread *thread; +}; + +struct spdk_bdev * +spdk_bdev_part_base_get_bdev(struct spdk_bdev_part_base *part_base) +{ + return part_base->bdev; +} + +struct spdk_bdev_desc * +spdk_bdev_part_base_get_desc(struct spdk_bdev_part_base *part_base) +{ + return part_base->desc; +} + +struct bdev_part_tailq * +spdk_bdev_part_base_get_tailq(struct spdk_bdev_part_base *part_base) +{ + return part_base->tailq; +} + +void * +spdk_bdev_part_base_get_ctx(struct spdk_bdev_part_base *part_base) +{ + return part_base->ctx; +} + +const char * +spdk_bdev_part_base_get_bdev_name(struct spdk_bdev_part_base *part_base) +{ + return part_base->bdev->name; +} + +static void +bdev_part_base_free(void *ctx) +{ + struct spdk_bdev_desc *desc = ctx; + + spdk_bdev_close(desc); +} + +void +spdk_bdev_part_base_free(struct spdk_bdev_part_base *base) +{ + if (base->desc) { + /* Close the underlying bdev on its same opened thread. */ + if (base->thread && base->thread != spdk_get_thread()) { + spdk_thread_send_msg(base->thread, bdev_part_base_free, base->desc); + } else { + spdk_bdev_close(base->desc); + } + } + + if (base->base_free_fn != NULL) { + base->base_free_fn(base->ctx); + } + + free(base); +} + +static void +bdev_part_free_cb(void *io_device) +{ + struct spdk_bdev_part *part = io_device; + struct spdk_bdev_part_base *base; + + assert(part); + assert(part->internal.base); + + base = part->internal.base; + + TAILQ_REMOVE(base->tailq, part, tailq); + + if (--base->ref == 0) { + spdk_bdev_module_release_bdev(base->bdev); + spdk_bdev_part_base_free(base); + } + + spdk_bdev_destruct_done(&part->internal.bdev, 0); + free(part->internal.bdev.name); + free(part->internal.bdev.product_name); + free(part); +} + +int +spdk_bdev_part_free(struct spdk_bdev_part *part) +{ + spdk_io_device_unregister(part, bdev_part_free_cb); + + /* Return 1 to indicate that this is an asynchronous operation that isn't complete + * until spdk_bdev_destruct_done is called */ + return 1; +} + +void +spdk_bdev_part_base_hotremove(struct spdk_bdev_part_base *part_base, struct bdev_part_tailq *tailq) +{ + struct spdk_bdev_part *part, *tmp; + + TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { + if (part->internal.base == part_base) { + spdk_bdev_unregister(&part->internal.bdev, NULL, NULL); + } + } +} + +static bool +bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type) +{ + struct spdk_bdev_part *part = _part; + + /* We can't decode/modify passthrough NVMe commands, so don't report + * that a partition supports these io types, even if the underlying + * bdev does. + */ + switch (io_type) { + case SPDK_BDEV_IO_TYPE_NVME_ADMIN: + case SPDK_BDEV_IO_TYPE_NVME_IO: + case SPDK_BDEV_IO_TYPE_NVME_IO_MD: + return false; + default: + break; + } + + return part->internal.base->bdev->fn_table->io_type_supported(part->internal.base->bdev->ctxt, + io_type); +} + +static struct spdk_io_channel * +bdev_part_get_io_channel(void *_part) +{ + struct spdk_bdev_part *part = _part; + + return spdk_get_io_channel(part); +} + +struct spdk_bdev * +spdk_bdev_part_get_bdev(struct spdk_bdev_part *part) +{ + return &part->internal.bdev; +} + +struct spdk_bdev_part_base * +spdk_bdev_part_get_base(struct spdk_bdev_part *part) +{ + return part->internal.base; +} + +struct spdk_bdev * +spdk_bdev_part_get_base_bdev(struct spdk_bdev_part *part) +{ + return part->internal.base->bdev; +} + +uint64_t +spdk_bdev_part_get_offset_blocks(struct spdk_bdev_part *part) +{ + return part->internal.offset_blocks; +} + +static int +bdev_part_remap_dif(struct spdk_bdev_io *bdev_io, uint32_t offset, + uint32_t remapped_offset) +{ + struct spdk_bdev *bdev = bdev_io->bdev; + struct spdk_dif_ctx dif_ctx; + struct spdk_dif_error err_blk = {}; + int rc; + + if (spdk_likely(!(bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK))) { + return 0; + } + + rc = spdk_dif_ctx_init(&dif_ctx, + bdev->blocklen, bdev->md_len, bdev->md_interleave, + bdev->dif_is_head_of_md, bdev->dif_type, bdev->dif_check_flags, + offset, 0, 0, 0, 0); + if (rc != 0) { + SPDK_ERRLOG("Initialization of DIF context failed\n"); + return rc; + } + + spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset); + + if (bdev->md_interleave) { + rc = spdk_dif_remap_ref_tag(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); + } else { + struct iovec md_iov = { + .iov_base = bdev_io->u.bdev.md_buf, + .iov_len = bdev_io->u.bdev.num_blocks * bdev->md_len, + }; + + rc = spdk_dix_remap_ref_tag(&md_iov, bdev_io->u.bdev.num_blocks, &dif_ctx, &err_blk); + } + + if (rc != 0) { + SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%" PRIu32 "\n", + err_blk.err_type, err_blk.err_offset); + } + + return rc; +} + +static void +bdev_part_complete_read_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *part_io = cb_arg; + uint32_t offset, remapped_offset; + int rc, status; + + offset = bdev_io->u.bdev.offset_blocks; + remapped_offset = part_io->u.bdev.offset_blocks; + + if (success) { + rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); + if (rc != 0) { + success = false; + } + } + + status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; + + spdk_bdev_io_complete(part_io, status); + spdk_bdev_free_io(bdev_io); +} + +static void +bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *part_io = cb_arg; + int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; + + spdk_bdev_io_complete(part_io, status); + spdk_bdev_free_io(bdev_io); +} + +static void +bdev_part_complete_zcopy_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) +{ + struct spdk_bdev_io *part_io = cb_arg; + int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; + + spdk_bdev_io_set_buf(part_io, bdev_io->u.bdev.iovs[0].iov_base, bdev_io->u.bdev.iovs[0].iov_len); + spdk_bdev_io_complete(part_io, status); + spdk_bdev_free_io(bdev_io); +} + +int +spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io) +{ + struct spdk_bdev_part *part = ch->part; + struct spdk_io_channel *base_ch = ch->base_ch; + struct spdk_bdev_desc *base_desc = part->internal.base->desc; + uint64_t offset, remapped_offset; + int rc = 0; + + offset = bdev_io->u.bdev.offset_blocks; + remapped_offset = offset + part->internal.offset_blocks; + + /* Modify the I/O to adjust for the offset within the base bdev. */ + switch (bdev_io->type) { + case SPDK_BDEV_IO_TYPE_READ: + if (bdev_io->u.bdev.md_buf == NULL) { + rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, remapped_offset, + bdev_io->u.bdev.num_blocks, + bdev_part_complete_read_io, bdev_io); + } else { + rc = spdk_bdev_readv_blocks_with_md(base_desc, base_ch, + bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.md_buf, remapped_offset, + bdev_io->u.bdev.num_blocks, + bdev_part_complete_read_io, bdev_io); + } + break; + case SPDK_BDEV_IO_TYPE_WRITE: + rc = bdev_part_remap_dif(bdev_io, offset, remapped_offset); + if (rc != 0) { + return SPDK_BDEV_IO_STATUS_FAILED; + } + + if (bdev_io->u.bdev.md_buf == NULL) { + rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, remapped_offset, + bdev_io->u.bdev.num_blocks, + bdev_part_complete_io, bdev_io); + } else { + rc = spdk_bdev_writev_blocks_with_md(base_desc, base_ch, + bdev_io->u.bdev.iovs, + bdev_io->u.bdev.iovcnt, + bdev_io->u.bdev.md_buf, remapped_offset, + bdev_io->u.bdev.num_blocks, + bdev_part_complete_io, bdev_io); + } + break; + case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: + rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, remapped_offset, + bdev_io->u.bdev.num_blocks, bdev_part_complete_io, + bdev_io); + break; + case SPDK_BDEV_IO_TYPE_UNMAP: + rc = spdk_bdev_unmap_blocks(base_desc, base_ch, remapped_offset, + bdev_io->u.bdev.num_blocks, bdev_part_complete_io, + bdev_io); + break; + case SPDK_BDEV_IO_TYPE_FLUSH: + rc = spdk_bdev_flush_blocks(base_desc, base_ch, remapped_offset, + bdev_io->u.bdev.num_blocks, bdev_part_complete_io, + bdev_io); + break; + case SPDK_BDEV_IO_TYPE_RESET: + rc = spdk_bdev_reset(base_desc, base_ch, + bdev_part_complete_io, bdev_io); + break; + case SPDK_BDEV_IO_TYPE_ZCOPY: + rc = spdk_bdev_zcopy_start(base_desc, base_ch, remapped_offset, + bdev_io->u.bdev.num_blocks, bdev_io->u.bdev.zcopy.populate, + bdev_part_complete_zcopy_io, bdev_io); + break; + default: + SPDK_ERRLOG("unknown I/O type %d\n", bdev_io->type); + return SPDK_BDEV_IO_STATUS_FAILED; + } + + return rc; +} + +static int +bdev_part_channel_create_cb(void *io_device, void *ctx_buf) +{ + struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; + struct spdk_bdev_part_channel *ch = ctx_buf; + + ch->part = part; + ch->base_ch = spdk_bdev_get_io_channel(part->internal.base->desc); + if (ch->base_ch == NULL) { + return -1; + } + + if (part->internal.base->ch_create_cb) { + return part->internal.base->ch_create_cb(io_device, ctx_buf); + } else { + return 0; + } +} + +static void +bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf) +{ + struct spdk_bdev_part *part = (struct spdk_bdev_part *)io_device; + struct spdk_bdev_part_channel *ch = ctx_buf; + + if (part->internal.base->ch_destroy_cb) { + part->internal.base->ch_destroy_cb(io_device, ctx_buf); + } + spdk_put_io_channel(ch->base_ch); +} + +struct spdk_bdev_part_base * + spdk_bdev_part_base_construct(struct spdk_bdev *bdev, + spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module *module, + struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq, + spdk_bdev_part_base_free_fn free_fn, void *ctx, + uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb, + spdk_io_channel_destroy_cb ch_destroy_cb) +{ + int rc; + struct spdk_bdev_part_base *base; + + base = calloc(1, sizeof(*base)); + if (!base) { + SPDK_ERRLOG("Memory allocation failure\n"); + return NULL; + } + fn_table->get_io_channel = bdev_part_get_io_channel; + fn_table->io_type_supported = bdev_part_io_type_supported; + + base->bdev = bdev; + base->desc = NULL; + base->ref = 0; + base->module = module; + base->fn_table = fn_table; + base->tailq = tailq; + base->base_free_fn = free_fn; + base->ctx = ctx; + base->claimed = false; + base->channel_size = channel_size; + base->ch_create_cb = ch_create_cb; + base->ch_destroy_cb = ch_destroy_cb; + + rc = spdk_bdev_open(bdev, false, remove_cb, base, &base->desc); + if (rc) { + spdk_bdev_part_base_free(base); + SPDK_ERRLOG("could not open bdev %s: %s\n", spdk_bdev_get_name(bdev), + spdk_strerror(-rc)); + return NULL; + } + + /* Save the thread where the base device is opened */ + base->thread = spdk_get_thread(); + + return base; +} + +int +spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, + char *name, uint64_t offset_blocks, uint64_t num_blocks, + char *product_name) +{ + part->internal.bdev.blocklen = base->bdev->blocklen; + part->internal.bdev.blockcnt = num_blocks; + part->internal.offset_blocks = offset_blocks; + + part->internal.bdev.write_cache = base->bdev->write_cache; + part->internal.bdev.required_alignment = base->bdev->required_alignment; + part->internal.bdev.ctxt = part; + part->internal.bdev.module = base->module; + part->internal.bdev.fn_table = base->fn_table; + + part->internal.bdev.md_interleave = base->bdev->md_interleave; + part->internal.bdev.md_len = base->bdev->md_len; + part->internal.bdev.dif_type = base->bdev->dif_type; + part->internal.bdev.dif_is_head_of_md = base->bdev->dif_is_head_of_md; + part->internal.bdev.dif_check_flags = base->bdev->dif_check_flags; + + part->internal.bdev.name = strdup(name); + part->internal.bdev.product_name = strdup(product_name); + + if (part->internal.bdev.name == NULL) { + SPDK_ERRLOG("Failed to allocate name for new part of bdev %s\n", spdk_bdev_get_name(base->bdev)); + return -1; + } else if (part->internal.bdev.product_name == NULL) { + free(part->internal.bdev.name); + SPDK_ERRLOG("Failed to allocate product name for new part of bdev %s\n", + spdk_bdev_get_name(base->bdev)); + return -1; + } + + base->ref++; + part->internal.base = base; + + if (!base->claimed) { + int rc; + + rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module); + if (rc) { + SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev)); + free(part->internal.bdev.name); + free(part->internal.bdev.product_name); + return -1; + } + base->claimed = true; + } + + spdk_io_device_register(part, bdev_part_channel_create_cb, + bdev_part_channel_destroy_cb, + base->channel_size, + name); + + spdk_bdev_register(&part->internal.bdev); + TAILQ_INSERT_TAIL(base->tailq, part, tailq); + + return 0; +} |