summaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-vdo/vio.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:40 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:11:40 +0000
commit8b0a8165cdad0f4133837d753649ef4682e42c3b (patch)
tree5c58f869f31ddb1f7bd6e8bdea269b680b36c5b6 /drivers/md/dm-vdo/vio.c
parentReleasing progress-linux version 6.8.12-1~progress7.99u1. (diff)
downloadlinux-8b0a8165cdad0f4133837d753649ef4682e42c3b.tar.xz
linux-8b0a8165cdad0f4133837d753649ef4682e42c3b.zip
Merging upstream version 6.9.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/md/dm-vdo/vio.c')
-rw-r--r--drivers/md/dm-vdo/vio.c500
1 files changed, 500 insertions, 0 deletions
diff --git a/drivers/md/dm-vdo/vio.c b/drivers/md/dm-vdo/vio.c
new file mode 100644
index 0000000000..b291578f72
--- /dev/null
+++ b/drivers/md/dm-vdo/vio.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Red Hat
+ */
+
+#include "vio.h"
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/kernel.h>
+#include <linux/ratelimit.h>
+
+#include "logger.h"
+#include "memory-alloc.h"
+#include "permassert.h"
+
+#include "constants.h"
+#include "io-submitter.h"
+#include "vdo.h"
+
+/* A vio_pool is a collection of preallocated vios. */
+struct vio_pool {
+ /* The number of objects managed by the pool */
+ size_t size;
+ /* The list of objects which are available */
+ struct list_head available;
+ /* The queue of requestors waiting for objects from the pool */
+ struct vdo_wait_queue waiting;
+ /* The number of objects currently in use */
+ size_t busy_count;
+ /* The list of objects which are in use */
+ struct list_head busy;
+ /* The ID of the thread on which this pool may be used */
+ thread_id_t thread_id;
+ /* The buffer backing the pool's vios */
+ char *buffer;
+ /* The pool entries */
+ struct pooled_vio vios[];
+};
+
+physical_block_number_t pbn_from_vio_bio(struct bio *bio)
+{
+ struct vio *vio = bio->bi_private;
+ struct vdo *vdo = vio->completion.vdo;
+ physical_block_number_t pbn = bio->bi_iter.bi_sector / VDO_SECTORS_PER_BLOCK;
+
+ return ((pbn == VDO_GEOMETRY_BLOCK_LOCATION) ? pbn : pbn + vdo->geometry.bio_offset);
+}
+
+static int create_multi_block_bio(block_count_t size, struct bio **bio_ptr)
+{
+ struct bio *bio = NULL;
+ int result;
+
+ result = vdo_allocate_extended(struct bio, size + 1, struct bio_vec,
+ "bio", &bio);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ *bio_ptr = bio;
+ return VDO_SUCCESS;
+}
+
+int vdo_create_bio(struct bio **bio_ptr)
+{
+ return create_multi_block_bio(1, bio_ptr);
+}
+
+void vdo_free_bio(struct bio *bio)
+{
+ if (bio == NULL)
+ return;
+
+ bio_uninit(bio);
+ vdo_free(vdo_forget(bio));
+}
+
+int allocate_vio_components(struct vdo *vdo, enum vio_type vio_type,
+ enum vio_priority priority, void *parent,
+ unsigned int block_count, char *data, struct vio *vio)
+{
+ struct bio *bio;
+ int result;
+
+ result = VDO_ASSERT(block_count <= MAX_BLOCKS_PER_VIO,
+ "block count %u does not exceed maximum %u", block_count,
+ MAX_BLOCKS_PER_VIO);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = VDO_ASSERT(((vio_type != VIO_TYPE_UNINITIALIZED) && (vio_type != VIO_TYPE_DATA)),
+ "%d is a metadata type", vio_type);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ result = create_multi_block_bio(block_count, &bio);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ initialize_vio(vio, bio, block_count, vio_type, priority, vdo);
+ vio->completion.parent = parent;
+ vio->data = data;
+ return VDO_SUCCESS;
+}
+
+/**
+ * create_multi_block_metadata_vio() - Create a vio.
+ * @vdo: The vdo on which the vio will operate.
+ * @vio_type: The type of vio to create.
+ * @priority: The relative priority to assign to the vio.
+ * @parent: The parent of the vio.
+ * @block_count: The size of the vio in blocks.
+ * @data: The buffer.
+ * @vio_ptr: A pointer to hold the new vio.
+ *
+ * Return: VDO_SUCCESS or an error.
+ */
+int create_multi_block_metadata_vio(struct vdo *vdo, enum vio_type vio_type,
+ enum vio_priority priority, void *parent,
+ unsigned int block_count, char *data,
+ struct vio **vio_ptr)
+{
+ struct vio *vio;
+ int result;
+
+ BUILD_BUG_ON(sizeof(struct vio) > 256);
+
+ /*
+ * Metadata vios should use direct allocation and not use the buffer pool, which is
+ * reserved for submissions from the linux block layer.
+ */
+ result = vdo_allocate(1, struct vio, __func__, &vio);
+ if (result != VDO_SUCCESS) {
+ vdo_log_error("metadata vio allocation failure %d", result);
+ return result;
+ }
+
+ result = allocate_vio_components(vdo, vio_type, priority, parent, block_count,
+ data, vio);
+ if (result != VDO_SUCCESS) {
+ vdo_free(vio);
+ return result;
+ }
+
+ *vio_ptr = vio;
+ return VDO_SUCCESS;
+}
+
+/**
+ * free_vio_components() - Free the components of a vio embedded in a larger structure.
+ * @vio: The vio to destroy
+ */
+void free_vio_components(struct vio *vio)
+{
+ if (vio == NULL)
+ return;
+
+ BUG_ON(is_data_vio(vio));
+ vdo_free_bio(vdo_forget(vio->bio));
+}
+
+/**
+ * free_vio() - Destroy a vio.
+ * @vio: The vio to destroy.
+ */
+void free_vio(struct vio *vio)
+{
+ free_vio_components(vio);
+ vdo_free(vio);
+}
+
+/* Set bio properties for a VDO read or write. */
+void vdo_set_bio_properties(struct bio *bio, struct vio *vio, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn)
+{
+ struct vdo *vdo = vio->completion.vdo;
+ struct device_config *config = vdo->device_config;
+
+ pbn -= vdo->geometry.bio_offset;
+ vio->bio_zone = ((pbn / config->thread_counts.bio_rotation_interval) %
+ config->thread_counts.bio_threads);
+
+ bio->bi_private = vio;
+ bio->bi_end_io = callback;
+ bio->bi_opf = bi_opf;
+ bio->bi_iter.bi_sector = pbn * VDO_SECTORS_PER_BLOCK;
+}
+
+/*
+ * Prepares the bio to perform IO with the specified buffer. May only be used on a VDO-allocated
+ * bio, as it assumes the bio wraps a 4k buffer that is 4k aligned, but there does not have to be a
+ * vio associated with the bio.
+ */
+int vio_reset_bio(struct vio *vio, char *data, bio_end_io_t callback,
+ blk_opf_t bi_opf, physical_block_number_t pbn)
+{
+ int bvec_count, offset, len, i;
+ struct bio *bio = vio->bio;
+
+ bio_reset(bio, bio->bi_bdev, bi_opf);
+ vdo_set_bio_properties(bio, vio, callback, bi_opf, pbn);
+ if (data == NULL)
+ return VDO_SUCCESS;
+
+ bio->bi_io_vec = bio->bi_inline_vecs;
+ bio->bi_max_vecs = vio->block_count + 1;
+ len = VDO_BLOCK_SIZE * vio->block_count;
+ offset = offset_in_page(data);
+ bvec_count = DIV_ROUND_UP(offset + len, PAGE_SIZE);
+
+ /*
+ * If we knew that data was always on one page, or contiguous pages, we wouldn't need the
+ * loop. But if we're using vmalloc, it's not impossible that the data is in different
+ * pages that can't be merged in bio_add_page...
+ */
+ for (i = 0; (i < bvec_count) && (len > 0); i++) {
+ struct page *page;
+ int bytes_added;
+ int bytes = PAGE_SIZE - offset;
+
+ if (bytes > len)
+ bytes = len;
+
+ page = is_vmalloc_addr(data) ? vmalloc_to_page(data) : virt_to_page(data);
+ bytes_added = bio_add_page(bio, page, bytes, offset);
+
+ if (bytes_added != bytes) {
+ return vdo_log_error_strerror(VDO_BIO_CREATION_FAILED,
+ "Could only add %i bytes to bio",
+ bytes_added);
+ }
+
+ data += bytes;
+ len -= bytes;
+ offset = 0;
+ }
+
+ return VDO_SUCCESS;
+}
+
+/**
+ * update_vio_error_stats() - Update per-vio error stats and log the error.
+ * @vio: The vio which got an error.
+ * @format: The format of the message to log (a printf style format).
+ */
+void update_vio_error_stats(struct vio *vio, const char *format, ...)
+{
+ static DEFINE_RATELIMIT_STATE(error_limiter, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ va_list args;
+ int priority;
+ struct vdo *vdo = vio->completion.vdo;
+
+ switch (vio->completion.result) {
+ case VDO_READ_ONLY:
+ atomic64_inc(&vdo->stats.read_only_error_count);
+ return;
+
+ case VDO_NO_SPACE:
+ atomic64_inc(&vdo->stats.no_space_error_count);
+ priority = VDO_LOG_DEBUG;
+ break;
+
+ default:
+ priority = VDO_LOG_ERR;
+ }
+
+ if (!__ratelimit(&error_limiter))
+ return;
+
+ va_start(args, format);
+ vdo_vlog_strerror(priority, vio->completion.result, VDO_LOGGING_MODULE_NAME,
+ format, args);
+ va_end(args);
+}
+
+void vio_record_metadata_io_error(struct vio *vio)
+{
+ const char *description;
+ physical_block_number_t pbn = pbn_from_vio_bio(vio->bio);
+
+ if (bio_op(vio->bio) == REQ_OP_READ) {
+ description = "read";
+ } else if ((vio->bio->bi_opf & REQ_PREFLUSH) == REQ_PREFLUSH) {
+ description = (((vio->bio->bi_opf & REQ_FUA) == REQ_FUA) ?
+ "write+preflush+fua" :
+ "write+preflush");
+ } else if ((vio->bio->bi_opf & REQ_FUA) == REQ_FUA) {
+ description = "write+fua";
+ } else {
+ description = "write";
+ }
+
+ update_vio_error_stats(vio,
+ "Completing %s vio of type %u for physical block %llu with error",
+ description, vio->type, (unsigned long long) pbn);
+}
+
+/**
+ * make_vio_pool() - Create a new vio pool.
+ * @vdo: The vdo.
+ * @pool_size: The number of vios in the pool.
+ * @thread_id: The ID of the thread using this pool.
+ * @vio_type: The type of vios in the pool.
+ * @priority: The priority with which vios from the pool should be enqueued.
+ * @context: The context that each entry will have.
+ * @pool_ptr: The resulting pool.
+ *
+ * Return: A success or error code.
+ */
+int make_vio_pool(struct vdo *vdo, size_t pool_size, thread_id_t thread_id,
+ enum vio_type vio_type, enum vio_priority priority, void *context,
+ struct vio_pool **pool_ptr)
+{
+ struct vio_pool *pool;
+ char *ptr;
+ int result;
+
+ result = vdo_allocate_extended(struct vio_pool, pool_size, struct pooled_vio,
+ __func__, &pool);
+ if (result != VDO_SUCCESS)
+ return result;
+
+ pool->thread_id = thread_id;
+ INIT_LIST_HEAD(&pool->available);
+ INIT_LIST_HEAD(&pool->busy);
+
+ result = vdo_allocate(pool_size * VDO_BLOCK_SIZE, char,
+ "VIO pool buffer", &pool->buffer);
+ if (result != VDO_SUCCESS) {
+ free_vio_pool(pool);
+ return result;
+ }
+
+ ptr = pool->buffer;
+ for (pool->size = 0; pool->size < pool_size; pool->size++, ptr += VDO_BLOCK_SIZE) {
+ struct pooled_vio *pooled = &pool->vios[pool->size];
+
+ result = allocate_vio_components(vdo, vio_type, priority, NULL, 1, ptr,
+ &pooled->vio);
+ if (result != VDO_SUCCESS) {
+ free_vio_pool(pool);
+ return result;
+ }
+
+ pooled->context = context;
+ list_add_tail(&pooled->pool_entry, &pool->available);
+ }
+
+ *pool_ptr = pool;
+ return VDO_SUCCESS;
+}
+
+/**
+ * free_vio_pool() - Destroy a vio pool.
+ * @pool: The pool to free.
+ */
+void free_vio_pool(struct vio_pool *pool)
+{
+ struct pooled_vio *pooled, *tmp;
+
+ if (pool == NULL)
+ return;
+
+ /* Remove all available vios from the object pool. */
+ VDO_ASSERT_LOG_ONLY(!vdo_waitq_has_waiters(&pool->waiting),
+ "VIO pool must not have any waiters when being freed");
+ VDO_ASSERT_LOG_ONLY((pool->busy_count == 0),
+ "VIO pool must not have %zu busy entries when being freed",
+ pool->busy_count);
+ VDO_ASSERT_LOG_ONLY(list_empty(&pool->busy),
+ "VIO pool must not have busy entries when being freed");
+
+ list_for_each_entry_safe(pooled, tmp, &pool->available, pool_entry) {
+ list_del(&pooled->pool_entry);
+ free_vio_components(&pooled->vio);
+ pool->size--;
+ }
+
+ VDO_ASSERT_LOG_ONLY(pool->size == 0,
+ "VIO pool must not have missing entries when being freed");
+
+ vdo_free(vdo_forget(pool->buffer));
+ vdo_free(pool);
+}
+
+/**
+ * is_vio_pool_busy() - Check whether an vio pool has outstanding entries.
+ *
+ * Return: true if the pool is busy.
+ */
+bool is_vio_pool_busy(struct vio_pool *pool)
+{
+ return (pool->busy_count != 0);
+}
+
+/**
+ * acquire_vio_from_pool() - Acquire a vio and buffer from the pool (asynchronous).
+ * @pool: The vio pool.
+ * @waiter: Object that is requesting a vio.
+ */
+void acquire_vio_from_pool(struct vio_pool *pool, struct vdo_waiter *waiter)
+{
+ struct pooled_vio *pooled;
+
+ VDO_ASSERT_LOG_ONLY((pool->thread_id == vdo_get_callback_thread_id()),
+ "acquire from active vio_pool called from correct thread");
+
+ if (list_empty(&pool->available)) {
+ vdo_waitq_enqueue_waiter(&pool->waiting, waiter);
+ return;
+ }
+
+ pooled = list_first_entry(&pool->available, struct pooled_vio, pool_entry);
+ pool->busy_count++;
+ list_move_tail(&pooled->pool_entry, &pool->busy);
+ (*waiter->callback)(waiter, pooled);
+}
+
+/**
+ * return_vio_to_pool() - Return a vio to the pool
+ * @pool: The vio pool.
+ * @vio: The pooled vio to return.
+ */
+void return_vio_to_pool(struct vio_pool *pool, struct pooled_vio *vio)
+{
+ VDO_ASSERT_LOG_ONLY((pool->thread_id == vdo_get_callback_thread_id()),
+ "vio pool entry returned on same thread as it was acquired");
+
+ vio->vio.completion.error_handler = NULL;
+ vio->vio.completion.parent = NULL;
+ if (vdo_waitq_has_waiters(&pool->waiting)) {
+ vdo_waitq_notify_next_waiter(&pool->waiting, NULL, vio);
+ return;
+ }
+
+ list_move_tail(&vio->pool_entry, &pool->available);
+ --pool->busy_count;
+}
+
+/*
+ * Various counting functions for statistics.
+ * These are used for bios coming into VDO, as well as bios generated by VDO.
+ */
+void vdo_count_bios(struct atomic_bio_stats *bio_stats, struct bio *bio)
+{
+ if (((bio->bi_opf & REQ_PREFLUSH) != 0) && (bio->bi_iter.bi_size == 0)) {
+ atomic64_inc(&bio_stats->empty_flush);
+ atomic64_inc(&bio_stats->flush);
+ return;
+ }
+
+ switch (bio_op(bio)) {
+ case REQ_OP_WRITE:
+ atomic64_inc(&bio_stats->write);
+ break;
+ case REQ_OP_READ:
+ atomic64_inc(&bio_stats->read);
+ break;
+ case REQ_OP_DISCARD:
+ atomic64_inc(&bio_stats->discard);
+ break;
+ /*
+ * All other operations are filtered out in dmvdo.c, or not created by VDO, so
+ * shouldn't exist.
+ */
+ default:
+ VDO_ASSERT_LOG_ONLY(0, "Bio operation %d not a write, read, discard, or empty flush",
+ bio_op(bio));
+ }
+
+ if ((bio->bi_opf & REQ_PREFLUSH) != 0)
+ atomic64_inc(&bio_stats->flush);
+ if (bio->bi_opf & REQ_FUA)
+ atomic64_inc(&bio_stats->fua);
+}
+
+static void count_all_bios_completed(struct vio *vio, struct bio *bio)
+{
+ struct atomic_statistics *stats = &vio->completion.vdo->stats;
+
+ if (is_data_vio(vio)) {
+ vdo_count_bios(&stats->bios_out_completed, bio);
+ return;
+ }
+
+ vdo_count_bios(&stats->bios_meta_completed, bio);
+ if (vio->type == VIO_TYPE_RECOVERY_JOURNAL)
+ vdo_count_bios(&stats->bios_journal_completed, bio);
+ else if (vio->type == VIO_TYPE_BLOCK_MAP)
+ vdo_count_bios(&stats->bios_page_cache_completed, bio);
+}
+
+void vdo_count_completed_bios(struct bio *bio)
+{
+ struct vio *vio = (struct vio *) bio->bi_private;
+
+ atomic64_inc(&vio->completion.vdo->stats.bios_completed);
+ count_all_bios_completed(vio, bio);
+}