From 102b0d2daa97dae68d3eed54d8fe37a9cc38a892 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 11:13:47 +0200 Subject: Adding upstream version 2.8.0+dfsg. Signed-off-by: Daniel Baumann --- lib/aarch32/arm32_aeabi_divmod.c | 156 +++++++++++++++++ lib/aarch32/arm32_aeabi_divmod_a32.S | 30 ++++ lib/aarch32/armclang_printf.S | 19 +++ lib/aarch32/cache_helpers.S | 233 ++++++++++++++++++++++++++ lib/aarch32/misc_helpers.S | 315 +++++++++++++++++++++++++++++++++++ 5 files changed, 753 insertions(+) create mode 100644 lib/aarch32/arm32_aeabi_divmod.c create mode 100644 lib/aarch32/arm32_aeabi_divmod_a32.S create mode 100644 lib/aarch32/armclang_printf.S create mode 100644 lib/aarch32/cache_helpers.S create mode 100644 lib/aarch32/misc_helpers.S (limited to 'lib/aarch32') diff --git a/lib/aarch32/arm32_aeabi_divmod.c b/lib/aarch32/arm32_aeabi_divmod.c new file mode 100644 index 0000000..ea8e2bb --- /dev/null +++ b/lib/aarch32/arm32_aeabi_divmod.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* + * Form ABI specifications: + * int __aeabi_idiv(int numerator, int denominator); + * unsigned __aeabi_uidiv(unsigned numerator, unsigned denominator); + * + * typedef struct { int quot; int rem; } idiv_return; + * typedef struct { unsigned quot; unsigned rem; } uidiv_return; + * + * __value_in_regs idiv_return __aeabi_idivmod(int numerator, + * int *denominator); + * __value_in_regs uidiv_return __aeabi_uidivmod(unsigned *numerator, + * unsigned denominator); + */ + +/* struct qr - stores quotient/remainder to handle divmod EABI interfaces. */ +struct qr { + unsigned int q; /* computed quotient */ + unsigned int r; /* computed remainder */ + unsigned int q_n; /* specifies if quotient shall be negative */ + unsigned int r_n; /* specifies if remainder shall be negative */ +}; + +static void uint_div_qr(unsigned int numerator, unsigned int denominator, + struct qr *qr); + +/* returns in R0 and R1 by tail calling an asm function */ +unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator); + +unsigned int __aeabi_uidiv(unsigned int numerator, unsigned int denominator); + +/* returns in R0 and R1 by tail calling an asm function */ +signed int __aeabi_idivmod(signed int numerator, signed int denominator); + +signed int __aeabi_idiv(signed int numerator, signed int denominator); + +/* + * __ste_idivmod_ret_t __aeabi_idivmod(signed numerator, signed denominator) + * Numerator and Denominator are received in R0 and R1. + * Where __ste_idivmod_ret_t is returned in R0 and R1. + * + * __ste_uidivmod_ret_t __aeabi_uidivmod(unsigned numerator, + * unsigned denominator) + * Numerator and Denominator are received in R0 and R1. + * Where __ste_uidivmod_ret_t is returned in R0 and R1. + */ +#ifdef __GNUC__ +signed int ret_idivmod_values(signed int quotient, signed int remainder); +unsigned int ret_uidivmod_values(unsigned int quotient, unsigned int remainder); +#else +#error "Compiler not supported" +#endif + +static void division_qr(unsigned int n, unsigned int p, struct qr *qr) +{ + unsigned int i = 1, q = 0; + + if (p == 0) { + qr->r = 0xFFFFFFFF; /* division by 0 */ + return; + } + + while ((p >> 31) == 0) { + i = i << 1; /* count the max division steps */ + p = p << 1; /* increase p until it has maximum size*/ + } + + while (i > 0) { + q = q << 1; /* write bit in q at index (size-1) */ + if (n >= p) { + n -= p; + q++; + } + p = p >> 1; /* decrease p */ + i = i >> 1; /* decrease remaining size in q */ + } + qr->r = n; + qr->q = q; +} + +static void uint_div_qr(unsigned int numerator, unsigned int denominator, + struct qr *qr) +{ + division_qr(numerator, denominator, qr); + + /* negate quotient and/or remainder according to requester */ + if (qr->q_n) + qr->q = -qr->q; + if (qr->r_n) + qr->r = -qr->r; +} + +unsigned int __aeabi_uidiv(unsigned int numerator, unsigned int denominator) +{ + struct qr qr = { .q_n = 0, .r_n = 0 }; + + uint_div_qr(numerator, denominator, &qr); + + return qr.q; +} + +unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator) +{ + struct qr qr = { .q_n = 0, .r_n = 0 }; + + uint_div_qr(numerator, denominator, &qr); + + return ret_uidivmod_values(qr.q, qr.r); +} + +signed int __aeabi_idiv(signed int numerator, signed int denominator) +{ + struct qr qr = { .q_n = 0, .r_n = 0 }; + + if (((numerator < 0) && (denominator > 0)) || + ((numerator > 0) && (denominator < 0))) + qr.q_n = 1; /* quotient shall be negate */ + + if (numerator < 0) { + numerator = -numerator; + qr.r_n = 1; /* remainder shall be negate */ + } + + if (denominator < 0) + denominator = -denominator; + + uint_div_qr(numerator, denominator, &qr); + + return qr.q; +} + +signed int __aeabi_idivmod(signed int numerator, signed int denominator) +{ + struct qr qr = { .q_n = 0, .r_n = 0 }; + + if (((numerator < 0) && (denominator > 0)) || + ((numerator > 0) && (denominator < 0))) + qr.q_n = 1; /* quotient shall be negate */ + + if (numerator < 0) { + numerator = -numerator; + qr.r_n = 1; /* remainder shall be negate */ + } + + if (denominator < 0) + denominator = -denominator; + + uint_div_qr(numerator, denominator, &qr); + + return ret_idivmod_values(qr.q, qr.r); +} diff --git a/lib/aarch32/arm32_aeabi_divmod_a32.S b/lib/aarch32/arm32_aeabi_divmod_a32.S new file mode 100644 index 0000000..6915dcd --- /dev/null +++ b/lib/aarch32/arm32_aeabi_divmod_a32.S @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include + +/* + * EABI wrappers from the udivmod and idivmod functions + */ + + .globl ret_uidivmod_values + .globl ret_idivmod_values + +/* + * signed ret_idivmod_values(signed quot, signed rem); + * return quotient and remaining the EABI way (regs r0,r1) + */ +func ret_idivmod_values + bx lr +endfunc ret_idivmod_values + +/* + * unsigned ret_uidivmod_values(unsigned quot, unsigned rem); + * return quotient and remaining the EABI way (regs r0,r1) + */ +func ret_uidivmod_values + bx lr +endfunc ret_uidivmod_values diff --git a/lib/aarch32/armclang_printf.S b/lib/aarch32/armclang_printf.S new file mode 100644 index 0000000..2b87bf7 --- /dev/null +++ b/lib/aarch32/armclang_printf.S @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include + +/* Symbols needed by armclang */ + + .globl __0printf + .globl __1printf + .globl __2printf + +func __0printf +__1printf: +__2printf: + b printf +endfunc __0printf diff --git a/lib/aarch32/cache_helpers.S b/lib/aarch32/cache_helpers.S new file mode 100644 index 0000000..13d1872 --- /dev/null +++ b/lib/aarch32/cache_helpers.S @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2016-2021, Arm Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include + + .globl flush_dcache_range + .globl clean_dcache_range + .globl inv_dcache_range + .globl dcsw_op_louis + .globl dcsw_op_all + .globl dcsw_op_level1 + .globl dcsw_op_level2 + .globl dcsw_op_level3 + +/* + * This macro can be used for implementing various data cache operations `op` + */ +.macro do_dcache_maintenance_by_mva op, coproc, opc1, CRn, CRm, opc2 + /* Exit early if size is zero */ + cmp r1, #0 + beq exit_loop_\op + dcache_line_size r2, r3 + add r1, r0, r1 + sub r3, r2, #1 + bic r0, r0, r3 +loop_\op: + stcopr r0, \coproc, \opc1, \CRn, \CRm, \opc2 + add r0, r0, r2 + cmp r0, r1 + blo loop_\op + dsb sy +exit_loop_\op: + bx lr +.endm + + /* ------------------------------------------ + * Clean+Invalidate from base address till + * size. 'r0' = addr, 'r1' = size + * ------------------------------------------ + */ +func flush_dcache_range + do_dcache_maintenance_by_mva cimvac, DCCIMVAC +endfunc flush_dcache_range + + /* ------------------------------------------ + * Clean from base address till size. + * 'r0' = addr, 'r1' = size + * ------------------------------------------ + */ +func clean_dcache_range + do_dcache_maintenance_by_mva cmvac, DCCMVAC +endfunc clean_dcache_range + + /* ------------------------------------------ + * Invalidate from base address till + * size. 'r0' = addr, 'r1' = size + * ------------------------------------------ + */ +func inv_dcache_range + do_dcache_maintenance_by_mva imvac, DCIMVAC +endfunc inv_dcache_range + + /* ---------------------------------------------------------------- + * Data cache operations by set/way to the level specified + * + * The main function, do_dcsw_op requires: + * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW), + * as defined in arch.h + * r1: The cache level to begin operation from + * r2: clidr_el1 + * r3: The last cache level to operate on + * and will carry out the operation on each data cache from level 0 + * to the level in r3 in sequence + * + * The dcsw_op macro sets up the r2 and r3 parameters based on + * clidr_el1 cache information before invoking the main function + * ---------------------------------------------------------------- + */ + + .macro dcsw_op shift, fw, ls + ldcopr r2, CLIDR + ubfx r3, r2, \shift, \fw + lsl r3, r3, \ls + mov r1, #0 + b do_dcsw_op + .endm + +func do_dcsw_op + push {r4-r12, lr} + ldcopr r8, ID_MMFR4 // stash FEAT_CCIDX identifier in r8 + ubfx r8, r8, #ID_MMFR4_CCIDX_SHIFT, #ID_MMFR4_CCIDX_LENGTH + adr r11, dcsw_loop_table // compute cache op based on the operation type + add r6, r11, r0, lsl #3 // cache op is 2x32-bit instructions +loop1: + add r10, r1, r1, LSR #1 // Work out 3x current cache level + mov r12, r2, LSR r10 // extract cache type bits from clidr + and r12, r12, #7 // mask the bits for current cache only + cmp r12, #2 // see what cache we have at this level + blo level_done // no cache or only instruction cache at this level + + stcopr r1, CSSELR // select current cache level in csselr + isb // isb to sych the new cssr&csidr + ldcopr r12, CCSIDR // read the new ccsidr + and r10, r12, #7 // extract the length of the cache lines + add r10, r10, #4 // add 4 (r10 = line length offset) + + cmp r8, #0 // check for FEAT_CCIDX for Associativity + beq 1f + ubfx r4, r12, #3, #21 // r4 = associativity CCSIDR[23:3] + b 2f +1: + ubfx r4, r12, #3, #10 // r4 = associativity CCSIDR[12:3] +2: + clz r5, r4 // r5 = the bit position of the way size increment + mov r9, r4 // r9 working copy of the aligned max way number + +loop2: + cmp r8, #0 // check for FEAT_CCIDX for NumSets + beq 3f + ldcopr r12, CCSIDR2 // FEAT_CCIDX numsets is in CCSIDR2 + ubfx r7, r12, #0, #24 // r7 = numsets CCSIDR2[23:0] + b loop3 +3: + ubfx r7, r12, #13, #15 // r7 = numsets CCSIDR[27:13] +loop3: + orr r0, r1, r9, LSL r5 // factor in the way number and cache level into r0 + orr r0, r0, r7, LSL r10 // factor in the set number + + blx r6 + subs r7, r7, #1 // decrement the set number + bhs loop3 + subs r9, r9, #1 // decrement the way number + bhs loop2 +level_done: + add r1, r1, #2 // increment the cache number + cmp r3, r1 + // Ensure completion of previous cache maintenance instruction. Note + // this also mitigates erratum 814220 on Cortex-A7 + dsb sy + bhi loop1 + + mov r6, #0 + stcopr r6, CSSELR //select cache level 0 in csselr + dsb sy + isb + pop {r4-r12, pc} + +dcsw_loop_table: + stcopr r0, DCISW + bx lr + stcopr r0, DCCISW + bx lr + stcopr r0, DCCSW + bx lr + +endfunc do_dcsw_op + + /* --------------------------------------------------------------- + * Data cache operations by set/way till PoU. + * + * The function requires : + * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW), + * as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_louis + dcsw_op #LOUIS_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT +endfunc dcsw_op_louis + + /* --------------------------------------------------------------- + * Data cache operations by set/way till PoC. + * + * The function requires : + * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW), + * as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_all + dcsw_op #LOC_SHIFT, #CLIDR_FIELD_WIDTH, #LEVEL_SHIFT +endfunc dcsw_op_all + + + /* --------------------------------------------------------------- + * Helper macro for data cache operations by set/way for the + * level specified + * --------------------------------------------------------------- + */ + .macro dcsw_op_level level + ldcopr r2, CLIDR + mov r3, \level + sub r1, r3, #2 + b do_dcsw_op + .endm + + /* --------------------------------------------------------------- + * Data cache operations by set/way for level 1 cache + * + * The main function, do_dcsw_op requires: + * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW), + * as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_level1 + dcsw_op_level #(1 << LEVEL_SHIFT) +endfunc dcsw_op_level1 + + /* --------------------------------------------------------------- + * Data cache operations by set/way for level 2 cache + * + * The main function, do_dcsw_op requires: + * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW), + * as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_level2 + dcsw_op_level #(2 << LEVEL_SHIFT) +endfunc dcsw_op_level2 + + /* --------------------------------------------------------------- + * Data cache operations by set/way for level 3 cache + * + * The main function, do_dcsw_op requires: + * r0: The operation type (DC_OP_ISW, DC_OP_CISW, DC_OP_CSW), + * as defined in arch.h + * --------------------------------------------------------------- + */ +func dcsw_op_level3 + dcsw_op_level #(3 << LEVEL_SHIFT) +endfunc dcsw_op_level3 diff --git a/lib/aarch32/misc_helpers.S b/lib/aarch32/misc_helpers.S new file mode 100644 index 0000000..59e15bd --- /dev/null +++ b/lib/aarch32/misc_helpers.S @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include +#include +#include + + .globl smc + .globl zeromem + .globl zero_normalmem + .globl memcpy4 + .globl disable_mmu_icache_secure + .globl disable_mmu_secure + .globl fixup_gdt_reloc + +#define PAGE_START_MASK ~(PAGE_SIZE_MASK) + +func smc + /* + * For AArch32 only r0-r3 will be in the registers; + * rest r4-r6 will be pushed on to the stack. So here, we'll + * have to load them from the stack to registers r4-r6 explicitly. + * Clobbers: r4-r6 + */ + ldm sp, {r4, r5, r6} + smc #0 +endfunc smc + +/* ----------------------------------------------------------------------- + * void zeromem(void *mem, unsigned int length) + * + * Initialise a region in normal memory to 0. This functions complies with the + * AAPCS and can be called from C code. + * + * ----------------------------------------------------------------------- + */ +func zeromem + /* + * Readable names for registers + * + * Registers r0, r1 and r2 are also set by zeromem which + * branches into the fallback path directly, so cursor, length and + * stop_address should not be retargeted to other registers. + */ + cursor .req r0 /* Start address and then current address */ + length .req r1 /* Length in bytes of the region to zero out */ + /* + * Reusing the r1 register as length is only used at the beginning of + * the function. + */ + stop_address .req r1 /* Address past the last zeroed byte */ + zeroreg1 .req r2 /* Source register filled with 0 */ + zeroreg2 .req r3 /* Source register filled with 0 */ + tmp .req r12 /* Temporary scratch register */ + + mov zeroreg1, #0 + + /* stop_address is the address past the last to zero */ + add stop_address, cursor, length + + /* + * Length cannot be used anymore as it shares the same register with + * stop_address. + */ + .unreq length + + /* + * If the start address is already aligned to 8 bytes, skip this loop. + */ + tst cursor, #(8-1) + beq .Lzeromem_8bytes_aligned + + /* Calculate the next address aligned to 8 bytes */ + orr tmp, cursor, #(8-1) + adds tmp, tmp, #1 + /* If it overflows, fallback to byte per byte zeroing */ + beq .Lzeromem_1byte_aligned + /* If the next aligned address is after the stop address, fall back */ + cmp tmp, stop_address + bhs .Lzeromem_1byte_aligned + + /* zero byte per byte */ +1: + strb zeroreg1, [cursor], #1 + cmp cursor, tmp + bne 1b + + /* zero 8 bytes at a time */ +.Lzeromem_8bytes_aligned: + + /* Calculate the last 8 bytes aligned address. */ + bic tmp, stop_address, #(8-1) + + cmp cursor, tmp + bhs 2f + + mov zeroreg2, #0 +1: + stmia cursor!, {zeroreg1, zeroreg2} + cmp cursor, tmp + blo 1b +2: + + /* zero byte per byte */ +.Lzeromem_1byte_aligned: + cmp cursor, stop_address + beq 2f +1: + strb zeroreg1, [cursor], #1 + cmp cursor, stop_address + bne 1b +2: + bx lr + + .unreq cursor + /* + * length is already unreq'ed to reuse the register for another + * variable. + */ + .unreq stop_address + .unreq zeroreg1 + .unreq zeroreg2 + .unreq tmp +endfunc zeromem + +/* + * AArch32 does not have special ways of zeroing normal memory as AArch64 does + * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem. + */ +.equ zero_normalmem, zeromem + +/* -------------------------------------------------------------------------- + * void memcpy4(void *dest, const void *src, unsigned int length) + * + * Copy length bytes from memory area src to memory area dest. + * The memory areas should not overlap. + * Destination and source addresses must be 4-byte aligned. + * -------------------------------------------------------------------------- + */ +func memcpy4 +#if ENABLE_ASSERTIONS + orr r3, r0, r1 + tst r3, #0x3 + ASM_ASSERT(eq) +#endif +/* copy 4 bytes at a time */ +m_loop4: + cmp r2, #4 + blo m_loop1 + ldr r3, [r1], #4 + str r3, [r0], #4 + subs r2, r2, #4 + bne m_loop4 + bx lr + +/* copy byte per byte */ +m_loop1: + ldrb r3, [r1], #1 + strb r3, [r0], #1 + subs r2, r2, #1 + bne m_loop1 + bx lr +endfunc memcpy4 + +/* --------------------------------------------------------------------------- + * Disable the MMU in Secure State + * --------------------------------------------------------------------------- + */ + +func disable_mmu_secure + mov r1, #(SCTLR_M_BIT | SCTLR_C_BIT) +do_disable_mmu: +#if ERRATA_A9_794073 + stcopr r0, BPIALL + dsb +#endif + ldcopr r0, SCTLR + bic r0, r0, r1 + stcopr r0, SCTLR + isb // ensure MMU is off + dsb sy + bx lr +endfunc disable_mmu_secure + + +func disable_mmu_icache_secure + ldr r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) + b do_disable_mmu +endfunc disable_mmu_icache_secure + +/* --------------------------------------------------------------------------- + * Helper to fixup Global Descriptor table (GDT) and dynamic relocations + * (.rel.dyn) at runtime. + * + * This function is meant to be used when the firmware is compiled with -fpie + * and linked with -pie options. We rely on the linker script exporting + * appropriate markers for start and end of the section. For GOT, we + * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect + * __RELA_START__ and __RELA_END__. + * + * The function takes the limits of the memory to apply fixups to as + * arguments (which is usually the limits of the relocable BL image). + * r0 - the start of the fixup region + * r1 - the limit of the fixup region + * These addresses have to be 4KB page aligned. + * --------------------------------------------------------------------------- + */ + +/* Relocation codes */ +#define R_ARM_RELATIVE 23 + +func fixup_gdt_reloc + mov r6, r0 + mov r7, r1 + +#if ENABLE_ASSERTIONS + /* Test if the limits are 4K aligned */ + orr r0, r0, r1 + mov r1, #(PAGE_SIZE_MASK) + tst r0, r1 + ASM_ASSERT(eq) +#endif + /* + * Calculate the offset based on return address in lr. + * Assume that this function is called within a page at the start of + * fixup region. + */ + ldr r1, =PAGE_START_MASK + and r2, lr, r1 + subs r0, r2, r6 /* Diff(S) = Current Address - Compiled Address */ + beq 3f /* Diff(S) = 0. No relocation needed */ + + ldr r1, =__GOT_START__ + add r1, r1, r0 + ldr r2, =__GOT_END__ + add r2, r2, r0 + + /* + * GOT is an array of 32_bit addresses which must be fixed up as + * new_addr = old_addr + Diff(S). + * The new_addr is the address currently the binary is executing from + * and old_addr is the address at compile time. + */ +1: ldr r3, [r1] + + /* Skip adding offset if address is < lower limit */ + cmp r3, r6 + blo 2f + + /* Skip adding offset if address is > upper limit */ + cmp r3, r7 + bhi 2f + add r3, r3, r0 + str r3, [r1] + +2: add r1, r1, #4 + cmp r1, r2 + blo 1b + + /* Starting dynamic relocations. Use ldr to get RELA_START and END */ +3: ldr r1, =__RELA_START__ + add r1, r1, r0 + ldr r2, =__RELA_END__ + add r2, r2, r0 + + /* + * According to ELF-32 specification, the RELA data structure is as + * follows: + * typedef struct { + * Elf32_Addr r_offset; + * Elf32_Xword r_info; + * } Elf32_Rela; + * + * r_offset is address of reference + * r_info is symbol index and type of relocation (in this case + * code 23 which corresponds to R_ARM_RELATIVE). + * + * Size of Elf32_Rela structure is 8 bytes. + */ + + /* Skip R_ARM_NONE entry with code 0 */ +1: ldr r3, [r1, #4] + ands r3, r3, #0xff + beq 2f + +#if ENABLE_ASSERTIONS + /* Assert that the relocation type is R_ARM_RELATIVE */ + cmp r3, #R_ARM_RELATIVE + ASM_ASSERT(eq) +#endif + ldr r3, [r1] /* r_offset */ + add r3, r0, r3 /* Diff(S) + r_offset */ + ldr r4, [r3] + + /* Skip adding offset if address is < lower limit */ + cmp r4, r6 + blo 2f + + /* Skip adding offset if address is > upper limit */ + cmp r4, r7 + bhi 2f + + add r4, r0, r4 + str r4, [r3] + +2: add r1, r1, #8 + cmp r1, r2 + blo 1b + bx lr +endfunc fixup_gdt_reloc -- cgit v1.2.3