diff options
Diffstat (limited to 'arch/arm/crypto/aes-cipher-core.S')
-rw-r--r-- | arch/arm/crypto/aes-cipher-core.S | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S new file mode 100644 index 000000000..472e56d09 --- /dev/null +++ b/arch/arm/crypto/aes-cipher-core.S @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Scalar AES core transform + * + * Copyright (C) 2017 Linaro Ltd. + * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/cache.h> + + .text + .align 5 + + rk .req r0 + rounds .req r1 + in .req r2 + out .req r3 + ttab .req ip + + t0 .req lr + t1 .req r2 + t2 .req r3 + + .macro __select, out, in, idx + .if __LINUX_ARM_ARCH__ < 7 + and \out, \in, #0xff << (8 * \idx) + .else + ubfx \out, \in, #(8 * \idx), #8 + .endif + .endm + + .macro __load, out, in, idx, sz, op + .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 + ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] + .else + ldr\op \out, [ttab, \in, lsl #\sz] + .endif + .endm + + .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr + __select \out0, \in0, 0 + __select t0, \in1, 1 + __load \out0, \out0, 0, \sz, \op + __load t0, t0, 1, \sz, \op + + .if \enc + __select \out1, \in1, 0 + __select t1, \in2, 1 + .else + __select \out1, \in3, 0 + __select t1, \in0, 1 + .endif + __load \out1, \out1, 0, \sz, \op + __select t2, \in2, 2 + __load t1, t1, 1, \sz, \op + __load t2, t2, 2, \sz, \op + + eor \out0, \out0, t0, ror #24 + + __select t0, \in3, 3 + .if \enc + __select \t3, \in3, 2 + __select \t4, \in0, 3 + .else + __select \t3, \in1, 2 + __select \t4, \in2, 3 + .endif + __load \t3, \t3, 2, \sz, \op + __load t0, t0, 3, \sz, \op + __load \t4, \t4, 3, \sz, \op + + .ifnb \oldcpsr + /* + * This is the final round and we're done with all data-dependent table + * lookups, so we can safely re-enable interrupts. + */ + restore_irqs \oldcpsr + .endif + + eor \out1, \out1, t1, ror #24 + eor \out0, \out0, t2, ror #16 + ldm rk!, {t1, t2} + eor \out1, \out1, \t3, ror #16 + eor \out0, \out0, t0, ror #8 + eor \out1, \out1, \t4, ror #8 + eor \out0, \out0, t1 + eor \out1, \out1, t2 + .endm + + .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr + __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op + __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr + .endm + + .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr + __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op + __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr + .endm + + .macro __rev, out, in + .if __LINUX_ARM_ARCH__ < 6 + lsl t0, \in, #24 + and t1, \in, #0xff00 + and t2, \in, #0xff0000 + orr \out, t0, \in, lsr #24 + orr \out, \out, t1, lsl #8 + orr \out, \out, t2, lsr #8 + .else + rev \out, \in + .endif + .endm + + .macro __adrl, out, sym, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \out, =\sym + .else + movw\c \out, #:lower16:\sym + movt\c \out, #:upper16:\sym + .endif + .endm + + .macro do_crypt, round, ttab, ltab, bsz + push {r3-r11, lr} + + // Load keys first, to reduce latency in case they're not cached yet. + ldm rk!, {r8-r11} + + ldr r4, [in] + ldr r5, [in, #4] + ldr r6, [in, #8] + ldr r7, [in, #12] + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + eor r4, r4, r8 + eor r5, r5, r9 + eor r6, r6, r10 + eor r7, r7, r11 + + __adrl ttab, \ttab + /* + * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into + * L1 cache, assuming cacheline size >= 32. This is a hardening measure + * intended to make cache-timing attacks more difficult. They may not + * be fully prevented, however; see the paper + * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf + * ("Cache-timing attacks on AES") for a discussion of the many + * difficulties involved in writing truly constant-time AES software. + */ + save_and_disable_irqs t0 + .set i, 0 + .rept 1024 / 128 + ldr r8, [ttab, #i + 0] + ldr r9, [ttab, #i + 32] + ldr r10, [ttab, #i + 64] + ldr r11, [ttab, #i + 96] + .set i, i + 128 + .endr + push {t0} // oldcpsr + + tst rounds, #2 + bne 1f + +0: \round r8, r9, r10, r11, r4, r5, r6, r7 + \round r4, r5, r6, r7, r8, r9, r10, r11 + +1: subs rounds, rounds, #4 + \round r8, r9, r10, r11, r4, r5, r6, r7 + bls 2f + \round r4, r5, r6, r7, r8, r9, r10, r11 + b 0b + +2: .ifb \ltab + add ttab, ttab, #1 + .else + __adrl ttab, \ltab + // Prefetch inverse S-box for final round; see explanation above + .set i, 0 + .rept 256 / 64 + ldr t0, [ttab, #i + 0] + ldr t1, [ttab, #i + 32] + .set i, i + 64 + .endr + .endif + + pop {rounds} // oldcpsr + \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds + +#ifdef CONFIG_CPU_BIG_ENDIAN + __rev r4, r4 + __rev r5, r5 + __rev r6, r6 + __rev r7, r7 +#endif + + ldr out, [sp] + + str r4, [out] + str r5, [out, #4] + str r6, [out, #8] + str r7, [out, #12] + + pop {r3-r11, pc} + + .align 3 + .ltorg + .endm + +ENTRY(__aes_arm_encrypt) + do_crypt fround, crypto_ft_tab,, 2 +ENDPROC(__aes_arm_encrypt) + + .align 5 +ENTRY(__aes_arm_decrypt) + do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 +ENDPROC(__aes_arm_decrypt) |