diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:11:27 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:11:27 +0000 |
commit | 34996e42f82bfd60bc2c191e5cae3c6ab233ec6c (patch) | |
tree | 62db60558cbf089714b48daeabca82bf2b20b20e /drivers/md/dm-vdo/physical-zone.c | |
parent | Adding debian version 6.8.12-1. (diff) | |
download | linux-34996e42f82bfd60bc2c191e5cae3c6ab233ec6c.tar.xz linux-34996e42f82bfd60bc2c191e5cae3c6ab233ec6c.zip |
Merging upstream version 6.9.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/md/dm-vdo/physical-zone.c')
-rw-r--r-- | drivers/md/dm-vdo/physical-zone.c | 644 |
1 files changed, 644 insertions, 0 deletions
diff --git a/drivers/md/dm-vdo/physical-zone.c b/drivers/md/dm-vdo/physical-zone.c new file mode 100644 index 0000000000..2fee3a7c11 --- /dev/null +++ b/drivers/md/dm-vdo/physical-zone.c @@ -0,0 +1,644 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2023 Red Hat + */ + +#include "physical-zone.h" + +#include <linux/list.h> + +#include "logger.h" +#include "memory-alloc.h" +#include "permassert.h" + +#include "block-map.h" +#include "completion.h" +#include "constants.h" +#include "data-vio.h" +#include "dedupe.h" +#include "encodings.h" +#include "flush.h" +#include "int-map.h" +#include "slab-depot.h" +#include "status-codes.h" +#include "vdo.h" + +/* Each user data_vio needs a PBN read lock and write lock. */ +#define LOCK_POOL_CAPACITY (2 * MAXIMUM_VDO_USER_VIOS) + +struct pbn_lock_implementation { + enum pbn_lock_type type; + const char *name; + const char *release_reason; +}; + +/* This array must have an entry for every pbn_lock_type value. */ +static const struct pbn_lock_implementation LOCK_IMPLEMENTATIONS[] = { + [VIO_READ_LOCK] = { + .type = VIO_READ_LOCK, + .name = "read", + .release_reason = "candidate duplicate", + }, + [VIO_WRITE_LOCK] = { + .type = VIO_WRITE_LOCK, + .name = "write", + .release_reason = "newly allocated", + }, + [VIO_BLOCK_MAP_WRITE_LOCK] = { + .type = VIO_BLOCK_MAP_WRITE_LOCK, + .name = "block map write", + .release_reason = "block map write", + }, +}; + +static inline bool has_lock_type(const struct pbn_lock *lock, enum pbn_lock_type type) +{ + return (lock->implementation == &LOCK_IMPLEMENTATIONS[type]); +} + +/** + * vdo_is_pbn_read_lock() - Check whether a pbn_lock is a read lock. + * @lock: The lock to check. + * + * Return: true if the lock is a read lock. + */ +bool vdo_is_pbn_read_lock(const struct pbn_lock *lock) +{ + return has_lock_type(lock, VIO_READ_LOCK); +} + +static inline void set_pbn_lock_type(struct pbn_lock *lock, enum pbn_lock_type type) +{ + lock->implementation = &LOCK_IMPLEMENTATIONS[type]; +} + +/** + * vdo_downgrade_pbn_write_lock() - Downgrade a PBN write lock to a PBN read lock. + * @lock: The PBN write lock to downgrade. + * + * The lock holder count is cleared and the caller is responsible for setting the new count. + */ +void vdo_downgrade_pbn_write_lock(struct pbn_lock *lock, bool compressed_write) +{ + VDO_ASSERT_LOG_ONLY(!vdo_is_pbn_read_lock(lock), + "PBN lock must not already have been downgraded"); + VDO_ASSERT_LOG_ONLY(!has_lock_type(lock, VIO_BLOCK_MAP_WRITE_LOCK), + "must not downgrade block map write locks"); + VDO_ASSERT_LOG_ONLY(lock->holder_count == 1, + "PBN write lock should have one holder but has %u", + lock->holder_count); + /* + * data_vio write locks are downgraded in place--the writer retains the hold on the lock. + * If this was a compressed write, the holder has not yet journaled its own inc ref, + * otherwise, it has. + */ + lock->increment_limit = + (compressed_write ? MAXIMUM_REFERENCE_COUNT : MAXIMUM_REFERENCE_COUNT - 1); + set_pbn_lock_type(lock, VIO_READ_LOCK); +} + +/** + * vdo_claim_pbn_lock_increment() - Try to claim one of the available reference count increments on + * a read lock. + * @lock: The PBN read lock from which to claim an increment. + * + * Claims may be attempted from any thread. A claim is only valid until the PBN lock is released. + * + * Return: true if the claim succeeded, guaranteeing one increment can be made without overflowing + * the PBN's reference count. + */ +bool vdo_claim_pbn_lock_increment(struct pbn_lock *lock) +{ + /* + * Claim the next free reference atomically since hash locks from multiple hash zone + * threads might be concurrently deduplicating against a single PBN lock on compressed + * block. As long as hitting the increment limit will lead to the PBN lock being released + * in a sane time-frame, we won't overflow a 32-bit claim counter, allowing a simple add + * instead of a compare-and-swap. + */ + u32 claim_number = (u32) atomic_add_return(1, &lock->increments_claimed); + + return (claim_number <= lock->increment_limit); +} + +/** + * vdo_assign_pbn_lock_provisional_reference() - Inform a PBN lock that it is responsible for a + * provisional reference. + * @lock: The PBN lock. + */ +void vdo_assign_pbn_lock_provisional_reference(struct pbn_lock *lock) +{ + VDO_ASSERT_LOG_ONLY(!lock->has_provisional_reference, + "lock does not have a provisional reference"); + lock->has_provisional_reference = true; +} + +/** + * vdo_unassign_pbn_lock_provisional_reference() - Inform a PBN lock that it is no longer + * responsible for a provisional reference. + * @lock: The PBN lock. + */ +void vdo_unassign_pbn_lock_provisional_reference(struct pbn_lock *lock) +{ + lock->has_provisional_reference = false; +} + +/** + * release_pbn_lock_provisional_reference() - If the lock is responsible for a provisional + * reference, release that reference. + * @lock: The lock. + * @locked_pbn: The PBN covered by the lock. + * @allocator: The block allocator from which to release the reference. + * + * This method is called when the lock is released. + */ +static void release_pbn_lock_provisional_reference(struct pbn_lock *lock, + physical_block_number_t locked_pbn, + struct block_allocator *allocator) +{ + int result; + + if (!vdo_pbn_lock_has_provisional_reference(lock)) + return; + + result = vdo_release_block_reference(allocator, locked_pbn); + if (result != VDO_SUCCESS) { + vdo_log_error_strerror(result, + "Failed to release reference to %s physical block %llu", + lock->implementation->release_reason, + (unsigned long long) locked_pbn); + } + + vdo_unassign_pbn_lock_provisional_reference(lock); +} + +/** + * union idle_pbn_lock - PBN lock list entries. + * + * Unused (idle) PBN locks are kept in a list. Just like in a malloc implementation, the lock + * structure is unused memory, so we can save a bit of space (and not pollute the lock structure + * proper) by using a union to overlay the lock structure with the free list. + */ +typedef union { + /** @entry: Only used while locks are in the pool. */ + struct list_head entry; + /** @lock: Only used while locks are not in the pool. */ + struct pbn_lock lock; +} idle_pbn_lock; + +/** + * struct pbn_lock_pool - list of PBN locks. + * + * The lock pool is little more than the memory allocated for the locks. + */ +struct pbn_lock_pool { + /** @capacity: The number of locks allocated for the pool. */ + size_t capacity; + /** @borrowed: The number of locks currently borrowed from the pool. */ + size_t borrowed; + /** @idle_list: A list containing all idle PBN lock instances. */ + struct list_head idle_list; + /** @locks: The memory for all the locks allocated by this pool. */ + idle_pbn_lock locks[]; +}; + +/** + * return_pbn_lock_to_pool() - Return a pbn lock to its pool. + * @pool: The pool from which the lock was borrowed. + * @lock: The last reference to the lock being returned. + * + * It must be the last live reference, as if the memory were being freed (the lock memory will + * re-initialized or zeroed). + */ +static void return_pbn_lock_to_pool(struct pbn_lock_pool *pool, struct pbn_lock *lock) +{ + idle_pbn_lock *idle; + + /* A bit expensive, but will promptly catch some use-after-free errors. */ + memset(lock, 0, sizeof(*lock)); + + idle = container_of(lock, idle_pbn_lock, lock); + INIT_LIST_HEAD(&idle->entry); + list_add_tail(&idle->entry, &pool->idle_list); + + VDO_ASSERT_LOG_ONLY(pool->borrowed > 0, "shouldn't return more than borrowed"); + pool->borrowed -= 1; +} + +/** + * make_pbn_lock_pool() - Create a new PBN lock pool and all the lock instances it can loan out. + * + * @capacity: The number of PBN locks to allocate for the pool. + * @pool_ptr: A pointer to receive the new pool. + * + * Return: VDO_SUCCESS or an error code. + */ +static int make_pbn_lock_pool(size_t capacity, struct pbn_lock_pool **pool_ptr) +{ + size_t i; + struct pbn_lock_pool *pool; + int result; + + result = vdo_allocate_extended(struct pbn_lock_pool, capacity, idle_pbn_lock, + __func__, &pool); + if (result != VDO_SUCCESS) + return result; + + pool->capacity = capacity; + pool->borrowed = capacity; + INIT_LIST_HEAD(&pool->idle_list); + + for (i = 0; i < capacity; i++) + return_pbn_lock_to_pool(pool, &pool->locks[i].lock); + + *pool_ptr = pool; + return VDO_SUCCESS; +} + +/** + * free_pbn_lock_pool() - Free a PBN lock pool. + * @pool: The lock pool to free. + * + * This also frees all the PBN locks it allocated, so the caller must ensure that all locks have + * been returned to the pool. + */ +static void free_pbn_lock_pool(struct pbn_lock_pool *pool) +{ + if (pool == NULL) + return; + + VDO_ASSERT_LOG_ONLY(pool->borrowed == 0, + "All PBN locks must be returned to the pool before it is freed, but %zu locks are still on loan", + pool->borrowed); + vdo_free(pool); +} + +/** + * borrow_pbn_lock_from_pool() - Borrow a PBN lock from the pool and initialize it with the + * provided type. + * @pool: The pool from which to borrow. + * @type: The type with which to initialize the lock. + * @lock_ptr: A pointer to receive the borrowed lock. + * + * Pools do not grow on demand or allocate memory, so this will fail if the pool is empty. Borrowed + * locks are still associated with this pool and must be returned to only this pool. + * + * Return: VDO_SUCCESS, or VDO_LOCK_ERROR if the pool is empty. + */ +static int __must_check borrow_pbn_lock_from_pool(struct pbn_lock_pool *pool, + enum pbn_lock_type type, + struct pbn_lock **lock_ptr) +{ + int result; + struct list_head *idle_entry; + idle_pbn_lock *idle; + + if (pool->borrowed >= pool->capacity) + return vdo_log_error_strerror(VDO_LOCK_ERROR, + "no free PBN locks left to borrow"); + pool->borrowed += 1; + + result = VDO_ASSERT(!list_empty(&pool->idle_list), + "idle list should not be empty if pool not at capacity"); + if (result != VDO_SUCCESS) + return result; + + idle_entry = pool->idle_list.prev; + list_del(idle_entry); + memset(idle_entry, 0, sizeof(*idle_entry)); + + idle = list_entry(idle_entry, idle_pbn_lock, entry); + idle->lock.holder_count = 0; + set_pbn_lock_type(&idle->lock, type); + + *lock_ptr = &idle->lock; + return VDO_SUCCESS; +} + +/** + * initialize_zone() - Initialize a physical zone. + * @vdo: The vdo to which the zone will belong. + * @zones: The physical_zones to which the zone being initialized belongs + * + * Return: VDO_SUCCESS or an error code. + */ +static int initialize_zone(struct vdo *vdo, struct physical_zones *zones) +{ + int result; + zone_count_t zone_number = zones->zone_count; + struct physical_zone *zone = &zones->zones[zone_number]; + + result = vdo_int_map_create(VDO_LOCK_MAP_CAPACITY, &zone->pbn_operations); + if (result != VDO_SUCCESS) + return result; + + result = make_pbn_lock_pool(LOCK_POOL_CAPACITY, &zone->lock_pool); + if (result != VDO_SUCCESS) { + vdo_int_map_free(zone->pbn_operations); + return result; + } + + zone->zone_number = zone_number; + zone->thread_id = vdo->thread_config.physical_threads[zone_number]; + zone->allocator = &vdo->depot->allocators[zone_number]; + zone->next = &zones->zones[(zone_number + 1) % vdo->thread_config.physical_zone_count]; + result = vdo_make_default_thread(vdo, zone->thread_id); + if (result != VDO_SUCCESS) { + free_pbn_lock_pool(vdo_forget(zone->lock_pool)); + vdo_int_map_free(zone->pbn_operations); + return result; + } + return result; +} + +/** + * vdo_make_physical_zones() - Make the physical zones for a vdo. + * @vdo: The vdo being constructed + * @zones_ptr: A pointer to hold the zones + * + * Return: VDO_SUCCESS or an error code. + */ +int vdo_make_physical_zones(struct vdo *vdo, struct physical_zones **zones_ptr) +{ + struct physical_zones *zones; + int result; + zone_count_t zone_count = vdo->thread_config.physical_zone_count; + + if (zone_count == 0) + return VDO_SUCCESS; + + result = vdo_allocate_extended(struct physical_zones, zone_count, + struct physical_zone, __func__, &zones); + if (result != VDO_SUCCESS) + return result; + + for (zones->zone_count = 0; zones->zone_count < zone_count; zones->zone_count++) { + result = initialize_zone(vdo, zones); + if (result != VDO_SUCCESS) { + vdo_free_physical_zones(zones); + return result; + } + } + + *zones_ptr = zones; + return VDO_SUCCESS; +} + +/** + * vdo_free_physical_zones() - Destroy the physical zones. + * @zones: The zones to free. + */ +void vdo_free_physical_zones(struct physical_zones *zones) +{ + zone_count_t index; + + if (zones == NULL) + return; + + for (index = 0; index < zones->zone_count; index++) { + struct physical_zone *zone = &zones->zones[index]; + + free_pbn_lock_pool(vdo_forget(zone->lock_pool)); + vdo_int_map_free(vdo_forget(zone->pbn_operations)); + } + + vdo_free(zones); +} + +/** + * vdo_get_physical_zone_pbn_lock() - Get the lock on a PBN if one exists. + * @zone: The physical zone responsible for the PBN. + * @pbn: The physical block number whose lock is desired. + * + * Return: The lock or NULL if the PBN is not locked. + */ +struct pbn_lock *vdo_get_physical_zone_pbn_lock(struct physical_zone *zone, + physical_block_number_t pbn) +{ + return ((zone == NULL) ? NULL : vdo_int_map_get(zone->pbn_operations, pbn)); +} + +/** + * vdo_attempt_physical_zone_pbn_lock() - Attempt to lock a physical block in the zone responsible + * for it. + * @zone: The physical zone responsible for the PBN. + * @pbn: The physical block number to lock. + * @type: The type with which to initialize a new lock. + * @lock_ptr: A pointer to receive the lock, existing or new. + * + * If the PBN is already locked, the existing lock will be returned. Otherwise, a new lock instance + * will be borrowed from the pool, initialized, and returned. The lock owner will be NULL for a new + * lock acquired by the caller, who is responsible for setting that field promptly. The lock owner + * will be non-NULL when there is already an existing lock on the PBN. + * + * Return: VDO_SUCCESS or an error. + */ +int vdo_attempt_physical_zone_pbn_lock(struct physical_zone *zone, + physical_block_number_t pbn, + enum pbn_lock_type type, + struct pbn_lock **lock_ptr) +{ + /* + * Borrow and prepare a lock from the pool so we don't have to do two int_map accesses in + * the common case of no lock contention. + */ + struct pbn_lock *lock, *new_lock = NULL; + int result; + + result = borrow_pbn_lock_from_pool(zone->lock_pool, type, &new_lock); + if (result != VDO_SUCCESS) { + VDO_ASSERT_LOG_ONLY(false, "must always be able to borrow a PBN lock"); + return result; + } + + result = vdo_int_map_put(zone->pbn_operations, pbn, new_lock, false, + (void **) &lock); + if (result != VDO_SUCCESS) { + return_pbn_lock_to_pool(zone->lock_pool, new_lock); + return result; + } + + if (lock != NULL) { + /* The lock is already held, so we don't need the borrowed one. */ + return_pbn_lock_to_pool(zone->lock_pool, vdo_forget(new_lock)); + result = VDO_ASSERT(lock->holder_count > 0, "physical block %llu lock held", + (unsigned long long) pbn); + if (result != VDO_SUCCESS) + return result; + *lock_ptr = lock; + } else { + *lock_ptr = new_lock; + } + return VDO_SUCCESS; +} + +/** + * allocate_and_lock_block() - Attempt to allocate a block from this zone. + * @allocation: The struct allocation of the data_vio attempting to allocate. + * + * If a block is allocated, the recipient will also hold a lock on it. + * + * Return: VDO_SUCCESS if a block was allocated, or an error code. + */ +static int allocate_and_lock_block(struct allocation *allocation) +{ + int result; + struct pbn_lock *lock; + + VDO_ASSERT_LOG_ONLY(allocation->lock == NULL, + "must not allocate a block while already holding a lock on one"); + + result = vdo_allocate_block(allocation->zone->allocator, &allocation->pbn); + if (result != VDO_SUCCESS) + return result; + + result = vdo_attempt_physical_zone_pbn_lock(allocation->zone, allocation->pbn, + allocation->write_lock_type, &lock); + if (result != VDO_SUCCESS) + return result; + + if (lock->holder_count > 0) { + /* This block is already locked, which should be impossible. */ + return vdo_log_error_strerror(VDO_LOCK_ERROR, + "Newly allocated block %llu was spuriously locked (holder_count=%u)", + (unsigned long long) allocation->pbn, + lock->holder_count); + } + + /* We've successfully acquired a new lock, so mark it as ours. */ + lock->holder_count += 1; + allocation->lock = lock; + vdo_assign_pbn_lock_provisional_reference(lock); + return VDO_SUCCESS; +} + +/** + * retry_allocation() - Retry allocating a block now that we're done waiting for scrubbing. + * @waiter: The allocating_vio that was waiting to allocate. + * @context: The context (unused). + */ +static void retry_allocation(struct vdo_waiter *waiter, void *context __always_unused) +{ + struct data_vio *data_vio = vdo_waiter_as_data_vio(waiter); + + /* Now that some slab has scrubbed, restart the allocation process. */ + data_vio->allocation.wait_for_clean_slab = false; + data_vio->allocation.first_allocation_zone = data_vio->allocation.zone->zone_number; + continue_data_vio(data_vio); +} + +/** + * continue_allocating() - Continue searching for an allocation by enqueuing to wait for scrubbing + * or switching to the next zone. + * @data_vio: The data_vio attempting to get an allocation. + * + * This method should only be called from the error handler set in data_vio_allocate_data_block. + * + * Return: true if the allocation process has continued in another zone. + */ +static bool continue_allocating(struct data_vio *data_vio) +{ + struct allocation *allocation = &data_vio->allocation; + struct physical_zone *zone = allocation->zone; + struct vdo_completion *completion = &data_vio->vio.completion; + int result = VDO_SUCCESS; + bool was_waiting = allocation->wait_for_clean_slab; + bool tried_all = (allocation->first_allocation_zone == zone->next->zone_number); + + vdo_reset_completion(completion); + + if (tried_all && !was_waiting) { + /* + * We've already looked in all the zones, and found nothing. So go through the + * zones again, and wait for each to scrub before trying to allocate. + */ + allocation->wait_for_clean_slab = true; + allocation->first_allocation_zone = zone->zone_number; + } + + if (allocation->wait_for_clean_slab) { + data_vio->waiter.callback = retry_allocation; + result = vdo_enqueue_clean_slab_waiter(zone->allocator, + &data_vio->waiter); + if (result == VDO_SUCCESS) { + /* We've enqueued to wait for a slab to be scrubbed. */ + return true; + } + + if ((result != VDO_NO_SPACE) || (was_waiting && tried_all)) { + vdo_set_completion_result(completion, result); + return false; + } + } + + allocation->zone = zone->next; + completion->callback_thread_id = allocation->zone->thread_id; + vdo_launch_completion(completion); + return true; +} + +/** + * vdo_allocate_block_in_zone() - Attempt to allocate a block in the current physical zone, and if + * that fails try the next if possible. + * @data_vio: The data_vio needing an allocation. + * + * Return: true if a block was allocated, if not the data_vio will have been dispatched so the + * caller must not touch it. + */ +bool vdo_allocate_block_in_zone(struct data_vio *data_vio) +{ + int result = allocate_and_lock_block(&data_vio->allocation); + + if (result == VDO_SUCCESS) + return true; + + if ((result != VDO_NO_SPACE) || !continue_allocating(data_vio)) + continue_data_vio_with_error(data_vio, result); + + return false; +} + +/** + * vdo_release_physical_zone_pbn_lock() - Release a physical block lock if it is held and return it + * to the lock pool. + * @zone: The physical zone in which the lock was obtained. + * @locked_pbn: The physical block number to unlock. + * @lock: The lock being released. + * + * It must be the last live reference, as if the memory were being freed (the + * lock memory will re-initialized or zeroed). + */ +void vdo_release_physical_zone_pbn_lock(struct physical_zone *zone, + physical_block_number_t locked_pbn, + struct pbn_lock *lock) +{ + struct pbn_lock *holder; + + if (lock == NULL) + return; + + VDO_ASSERT_LOG_ONLY(lock->holder_count > 0, + "should not be releasing a lock that is not held"); + + lock->holder_count -= 1; + if (lock->holder_count > 0) { + /* The lock was shared and is still referenced, so don't release it yet. */ + return; + } + + holder = vdo_int_map_remove(zone->pbn_operations, locked_pbn); + VDO_ASSERT_LOG_ONLY((lock == holder), "physical block lock mismatch for block %llu", + (unsigned long long) locked_pbn); + + release_pbn_lock_provisional_reference(lock, locked_pbn, zone->allocator); + return_pbn_lock_to_pool(zone->lock_pool, lock); +} + +/** + * vdo_dump_physical_zone() - Dump information about a physical zone to the log for debugging. + * @zone: The zone to dump. + */ +void vdo_dump_physical_zone(const struct physical_zone *zone) +{ + vdo_dump_block_allocator(zone->allocator); +} |