diff options
Diffstat (limited to 'lib/locks')
-rw-r--r-- | lib/locks/bakery/bakery_lock_coherent.c | 168 | ||||
-rw-r--r-- | lib/locks/bakery/bakery_lock_normal.c | 250 | ||||
-rw-r--r-- | lib/locks/exclusive/aarch32/spinlock.S | 43 | ||||
-rw-r--r-- | lib/locks/exclusive/aarch64/spinlock.S | 75 |
4 files changed, 536 insertions, 0 deletions
diff --git a/lib/locks/bakery/bakery_lock_coherent.c b/lib/locks/bakery/bakery_lock_coherent.c new file mode 100644 index 0000000..bcd89ef --- /dev/null +++ b/lib/locks/bakery/bakery_lock_coherent.c @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2013-2018, Arm Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <assert.h> +#include <string.h> + +#include <arch_helpers.h> +#include <lib/bakery_lock.h> +#include <lib/el3_runtime/cpu_data.h> +#include <plat/common/platform.h> + +/* + * Functions in this file implement Bakery Algorithm for mutual exclusion with the + * bakery lock data structures in coherent memory. + * + * ARM architecture offers a family of exclusive access instructions to + * efficiently implement mutual exclusion with hardware support. However, as + * well as depending on external hardware, the these instructions have defined + * behavior only on certain memory types (cacheable and Normal memory in + * particular; see ARMv8 Architecture Reference Manual section B2.10). Use cases + * in trusted firmware are such that mutual exclusion implementation cannot + * expect that accesses to the lock have the specific type required by the + * architecture for these primitives to function (for example, not all + * contenders may have address translation enabled). + * + * This implementation does not use mutual exclusion primitives. It expects + * memory regions where the locks reside to be fully ordered and coherent + * (either by disabling address translation, or by assigning proper attributes + * when translation is enabled). + * + * Note that the ARM architecture guarantees single-copy atomicity for aligned + * accesses regardless of status of address translation. + */ + +#define assert_bakery_entry_valid(_entry, _bakery) do { \ + assert((_bakery) != NULL); \ + assert((_entry) < BAKERY_LOCK_MAX_CPUS); \ +} while (false) + +/* Obtain a ticket for a given CPU */ +static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me) +{ + unsigned int my_ticket, their_ticket; + unsigned int they; + + /* Prevent recursive acquisition */ + assert(bakery_ticket_number(bakery->lock_data[me]) == 0U); + + /* + * Flag that we're busy getting our ticket. All CPUs are iterated in the + * order of their ordinal position to decide the maximum ticket value + * observed so far. Our priority is set to be greater than the maximum + * observed priority + * + * Note that it's possible that more than one contender gets the same + * ticket value. That's OK as the lock is acquired based on the priority + * value, not the ticket value alone. + */ + my_ticket = 0U; + bakery->lock_data[me] = make_bakery_data(CHOOSING_TICKET, my_ticket); + for (they = 0U; they < BAKERY_LOCK_MAX_CPUS; they++) { + their_ticket = bakery_ticket_number(bakery->lock_data[they]); + if (their_ticket > my_ticket) + my_ticket = their_ticket; + } + + /* + * Compute ticket; then signal to other contenders waiting for us to + * finish calculating our ticket value that we're done + */ + ++my_ticket; + bakery->lock_data[me] = make_bakery_data(CHOSEN_TICKET, my_ticket); + + return my_ticket; +} + + +/* + * Acquire bakery lock + * + * Contending CPUs need first obtain a non-zero ticket and then calculate + * priority value. A contending CPU iterate over all other CPUs in the platform, + * which may be contending for the same lock, in the order of their ordinal + * position (CPU0, CPU1 and so on). A non-contending CPU will have its ticket + * (and priority) value as 0. The contending CPU compares its priority with that + * of others'. The CPU with the highest priority (lowest numerical value) + * acquires the lock + */ +void bakery_lock_get(bakery_lock_t *bakery) +{ + unsigned int they, me; + unsigned int my_ticket, my_prio, their_ticket; + unsigned int their_bakery_data; + + me = plat_my_core_pos(); + + assert_bakery_entry_valid(me, bakery); + + /* Get a ticket */ + my_ticket = bakery_get_ticket(bakery, me); + + /* + * Now that we got our ticket, compute our priority value, then compare + * with that of others, and proceed to acquire the lock + */ + my_prio = bakery_get_priority(my_ticket, me); + for (they = 0U; they < BAKERY_LOCK_MAX_CPUS; they++) { + if (me == they) + continue; + + /* Wait for the contender to get their ticket */ + do { + their_bakery_data = bakery->lock_data[they]; + } while (bakery_is_choosing(their_bakery_data)); + + /* + * If the other party is a contender, they'll have non-zero + * (valid) ticket value. If they do, compare priorities + */ + their_ticket = bakery_ticket_number(their_bakery_data); + if ((their_ticket != 0U) && + (bakery_get_priority(their_ticket, they) < my_prio)) { + /* + * They have higher priority (lower value). Wait for + * their ticket value to change (either release the lock + * to have it dropped to 0; or drop and probably content + * again for the same lock to have an even higher value) + */ + do { + wfe(); + } while (their_ticket == + bakery_ticket_number(bakery->lock_data[they])); + } + } + + /* + * Lock acquired. Ensure that any reads and writes from a shared + * resource in the critical section read/write values after the lock is + * acquired. + */ + dmbish(); +} + + +/* Release the lock and signal contenders */ +void bakery_lock_release(bakery_lock_t *bakery) +{ + unsigned int me = plat_my_core_pos(); + + assert_bakery_entry_valid(me, bakery); + assert(bakery_ticket_number(bakery->lock_data[me]) != 0U); + + /* + * Ensure that other observers see any stores in the critical section + * before releasing the lock. Also ensure all loads in the critical + * section are complete before releasing the lock. Release the lock by + * resetting ticket. Then signal other waiting contenders. + */ + dmbish(); + bakery->lock_data[me] = 0U; + + /* Required to ensure ordering of the following sev */ + dsb(); + sev(); +} diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c new file mode 100644 index 0000000..9f27322 --- /dev/null +++ b/lib/locks/bakery/bakery_lock_normal.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2015-2020, Arm Limited and Contributors. All rights reserved. + * Copyright (c) 2020, NVIDIA Corporation. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <assert.h> +#include <string.h> + +#include <arch_helpers.h> +#include <lib/bakery_lock.h> +#include <lib/el3_runtime/cpu_data.h> +#include <lib/utils_def.h> +#include <plat/common/platform.h> + +/* + * Functions in this file implement Bakery Algorithm for mutual exclusion with the + * bakery lock data structures in cacheable and Normal memory. + * + * ARM architecture offers a family of exclusive access instructions to + * efficiently implement mutual exclusion with hardware support. However, as + * well as depending on external hardware, these instructions have defined + * behavior only on certain memory types (cacheable and Normal memory in + * particular; see ARMv8 Architecture Reference Manual section B2.10). Use cases + * in trusted firmware are such that mutual exclusion implementation cannot + * expect that accesses to the lock have the specific type required by the + * architecture for these primitives to function (for example, not all + * contenders may have address translation enabled). + * + * This implementation does not use mutual exclusion primitives. It expects + * memory regions where the locks reside to be cacheable and Normal. + * + * Note that the ARM architecture guarantees single-copy atomicity for aligned + * accesses regardless of status of address translation. + */ + +#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE +/* + * Verify that the platform defined value for the per-cpu space for bakery locks is + * a multiple of the cache line size, to prevent multiple CPUs writing to the same + * bakery lock cache line + * + * Using this value, if provided, rather than the linker generated value results in + * more efficient code + */ +CASSERT((PLAT_PERCPU_BAKERY_LOCK_SIZE & (CACHE_WRITEBACK_GRANULE - 1)) == 0, + PLAT_PERCPU_BAKERY_LOCK_SIZE_not_cacheline_multiple); +#define PERCPU_BAKERY_LOCK_SIZE (PLAT_PERCPU_BAKERY_LOCK_SIZE) +#else +/* + * Use the linker defined symbol which has evaluated the size reqiurement. + * This is not as efficient as using a platform defined constant + */ +IMPORT_SYM(uintptr_t, __PERCPU_BAKERY_LOCK_START__, BAKERY_LOCK_START); +IMPORT_SYM(uintptr_t, __PERCPU_BAKERY_LOCK_END__, BAKERY_LOCK_END); +#define PERCPU_BAKERY_LOCK_SIZE (BAKERY_LOCK_END - BAKERY_LOCK_START) +#endif + +static inline bakery_lock_t *get_bakery_info(unsigned int cpu_ix, + bakery_lock_t *lock) +{ + return (bakery_info_t *)((uintptr_t)lock + + cpu_ix * PERCPU_BAKERY_LOCK_SIZE); +} + +static inline void write_cache_op(uintptr_t addr, bool cached) +{ + if (cached) + dccvac(addr); + else + dcivac(addr); + + dsbish(); +} + +static inline void read_cache_op(uintptr_t addr, bool cached) +{ + if (cached) + dccivac(addr); + + dmbish(); +} + +/* Helper function to check if the lock is acquired */ +static inline __unused bool is_lock_acquired(const bakery_info_t *my_bakery_info, + bool is_cached) +{ + /* + * Even though lock data is updated only by the owning cpu and + * appropriate cache maintenance operations are performed, + * if the previous update was done when the cpu was not participating + * in coherency, then there is a chance that cache maintenance + * operations were not propagated to all the caches in the system. + * Hence do a `read_cache_op()` prior to read. + */ + read_cache_op((uintptr_t)my_bakery_info, is_cached); + return bakery_ticket_number(my_bakery_info->lock_data) != 0U; +} + +static unsigned int bakery_get_ticket(bakery_lock_t *lock, + unsigned int me, bool is_cached) +{ + unsigned int my_ticket, their_ticket; + unsigned int they; + bakery_info_t *my_bakery_info, *their_bakery_info; + + /* + * Obtain a reference to the bakery information for this cpu and ensure + * it is not NULL. + */ + my_bakery_info = get_bakery_info(me, lock); + assert(my_bakery_info != NULL); + + /* Prevent recursive acquisition.*/ + assert(!is_lock_acquired(my_bakery_info, is_cached)); + + /* + * Tell other contenders that we are through the bakery doorway i.e. + * going to allocate a ticket for this cpu. + */ + my_ticket = 0U; + my_bakery_info->lock_data = make_bakery_data(CHOOSING_TICKET, my_ticket); + + write_cache_op((uintptr_t)my_bakery_info, is_cached); + + /* + * Iterate through the bakery information of each contender to allocate + * the highest ticket number for this cpu. + */ + for (they = 0U; they < BAKERY_LOCK_MAX_CPUS; they++) { + if (me == they) + continue; + + /* + * Get a reference to the other contender's bakery info and + * ensure that a stale copy is not read. + */ + their_bakery_info = get_bakery_info(they, lock); + assert(their_bakery_info != NULL); + + read_cache_op((uintptr_t)their_bakery_info, is_cached); + + /* + * Update this cpu's ticket number if a higher ticket number is + * seen + */ + their_ticket = bakery_ticket_number(their_bakery_info->lock_data); + if (their_ticket > my_ticket) + my_ticket = their_ticket; + } + + /* + * Compute ticket; then signal to other contenders waiting for us to + * finish calculating our ticket value that we're done + */ + ++my_ticket; + my_bakery_info->lock_data = make_bakery_data(CHOSEN_TICKET, my_ticket); + + write_cache_op((uintptr_t)my_bakery_info, is_cached); + + return my_ticket; +} + +void bakery_lock_get(bakery_lock_t *lock) +{ + unsigned int they, me; + unsigned int my_ticket, my_prio, their_ticket; + bakery_info_t *their_bakery_info; + unsigned int their_bakery_data; + bool is_cached; + + me = plat_my_core_pos(); + is_cached = is_dcache_enabled(); + + /* Get a ticket */ + my_ticket = bakery_get_ticket(lock, me, is_cached); + + /* + * Now that we got our ticket, compute our priority value, then compare + * with that of others, and proceed to acquire the lock + */ + my_prio = bakery_get_priority(my_ticket, me); + for (they = 0U; they < BAKERY_LOCK_MAX_CPUS; they++) { + if (me == they) + continue; + + /* + * Get a reference to the other contender's bakery info and + * ensure that a stale copy is not read. + */ + their_bakery_info = get_bakery_info(they, lock); + assert(their_bakery_info != NULL); + + /* Wait for the contender to get their ticket */ + do { + read_cache_op((uintptr_t)their_bakery_info, is_cached); + their_bakery_data = their_bakery_info->lock_data; + } while (bakery_is_choosing(their_bakery_data)); + + /* + * If the other party is a contender, they'll have non-zero + * (valid) ticket value. If they do, compare priorities + */ + their_ticket = bakery_ticket_number(their_bakery_data); + if (their_ticket && (bakery_get_priority(their_ticket, they) < my_prio)) { + /* + * They have higher priority (lower value). Wait for + * their ticket value to change (either release the lock + * to have it dropped to 0; or drop and probably content + * again for the same lock to have an even higher value) + */ + do { + wfe(); + read_cache_op((uintptr_t)their_bakery_info, is_cached); + } while (their_ticket + == bakery_ticket_number(their_bakery_info->lock_data)); + } + } + + /* + * Lock acquired. Ensure that any reads and writes from a shared + * resource in the critical section read/write values after the lock is + * acquired. + */ + dmbish(); +} + +void bakery_lock_release(bakery_lock_t *lock) +{ + bakery_info_t *my_bakery_info; + bool is_cached = is_dcache_enabled(); + + my_bakery_info = get_bakery_info(plat_my_core_pos(), lock); + + assert(is_lock_acquired(my_bakery_info, is_cached)); + + /* + * Ensure that other observers see any stores in the critical section + * before releasing the lock. Also ensure all loads in the critical + * section are complete before releasing the lock. Release the lock by + * resetting ticket. Then signal other waiting contenders. + */ + dmbish(); + my_bakery_info->lock_data = 0U; + write_cache_op((uintptr_t)my_bakery_info, is_cached); + + /* This sev is ordered by the dsbish in write_cahce_op */ + sev(); +} diff --git a/lib/locks/exclusive/aarch32/spinlock.S b/lib/locks/exclusive/aarch32/spinlock.S new file mode 100644 index 0000000..853d096 --- /dev/null +++ b/lib/locks/exclusive/aarch32/spinlock.S @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016, Arm Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <asm_macros.S> + + .globl spin_lock + .globl spin_unlock + +#if ARM_ARCH_AT_LEAST(8, 0) +/* + * According to the ARMv8-A Architecture Reference Manual, "when the global + * monitor for a PE changes from Exclusive Access state to Open Access state, + * an event is generated.". This applies to both AArch32 and AArch64 modes of + * ARMv8-A. As a result, no explicit SEV with unlock is required. + */ +#define COND_SEV() +#else +#define COND_SEV() sev +#endif + +func spin_lock + mov r2, #1 +1: + ldrex r1, [r0] + cmp r1, #0 + wfene + strexeq r1, r2, [r0] + cmpeq r1, #0 + bne 1b + dmb + bx lr +endfunc spin_lock + + +func spin_unlock + mov r1, #0 + stl r1, [r0] + COND_SEV() + bx lr +endfunc spin_unlock diff --git a/lib/locks/exclusive/aarch64/spinlock.S b/lib/locks/exclusive/aarch64/spinlock.S new file mode 100644 index 0000000..5144bf7 --- /dev/null +++ b/lib/locks/exclusive/aarch64/spinlock.S @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013-2019, Arm Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <asm_macros.S> + + .globl spin_lock + .globl spin_unlock + +#if USE_SPINLOCK_CAS +#if !ARM_ARCH_AT_LEAST(8, 1) +#error USE_SPINLOCK_CAS option requires at least an ARMv8.1 platform +#endif + +/* + * When compiled for ARMv8.1 or later, choose spin locks based on Compare and + * Swap instruction. + */ + +/* + * Acquire lock using Compare and Swap instruction. + * + * Compare for 0 with acquire semantics, and swap 1. If failed to acquire, use + * load exclusive semantics to monitor the address and enter WFE. + * + * void spin_lock(spinlock_t *lock); + */ +func spin_lock + mov w2, #1 +1: mov w1, wzr +2: casa w1, w2, [x0] + cbz w1, 3f + ldxr w1, [x0] + cbz w1, 2b + wfe + b 1b +3: + ret +endfunc spin_lock + +#else /* !USE_SPINLOCK_CAS */ + +/* + * Acquire lock using load-/store-exclusive instruction pair. + * + * void spin_lock(spinlock_t *lock); + */ +func spin_lock + mov w2, #1 + sevl +l1: wfe +l2: ldaxr w1, [x0] + cbnz w1, l1 + stxr w1, w2, [x0] + cbnz w1, l2 + ret +endfunc spin_lock + +#endif /* USE_SPINLOCK_CAS */ + +/* + * Release lock previously acquired by spin_lock. + * + * Use store-release to unconditionally clear the spinlock variable. + * Store operation generates an event to all cores waiting in WFE + * when address is monitored by the global monitor. + * + * void spin_unlock(spinlock_t *lock); + */ +func spin_unlock + stlr wzr, [x0] + ret +endfunc spin_unlock |