summaryrefslogtreecommitdiffstats
path: root/arch/arm/crypto/aes-cipher-core.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/crypto/aes-cipher-core.S')
-rw-r--r--arch/arm/crypto/aes-cipher-core.S223
1 files changed, 223 insertions, 0 deletions
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
new file mode 100644
index 000000000..472e56d09
--- /dev/null
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
+
+ .text
+ .align 5
+
+ rk .req r0
+ rounds .req r1
+ in .req r2
+ out .req r3
+ ttab .req ip
+
+ t0 .req lr
+ t1 .req r2
+ t2 .req r3
+
+ .macro __select, out, in, idx
+ .if __LINUX_ARM_ARCH__ < 7
+ and \out, \in, #0xff << (8 * \idx)
+ .else
+ ubfx \out, \in, #(8 * \idx), #8
+ .endif
+ .endm
+
+ .macro __load, out, in, idx, sz, op
+ .if __LINUX_ARM_ARCH__ < 7 && \idx > 0
+ ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
+ .else
+ ldr\op \out, [ttab, \in, lsl #\sz]
+ .endif
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
+ __select \out0, \in0, 0
+ __select t0, \in1, 1
+ __load \out0, \out0, 0, \sz, \op
+ __load t0, t0, 1, \sz, \op
+
+ .if \enc
+ __select \out1, \in1, 0
+ __select t1, \in2, 1
+ .else
+ __select \out1, \in3, 0
+ __select t1, \in0, 1
+ .endif
+ __load \out1, \out1, 0, \sz, \op
+ __select t2, \in2, 2
+ __load t1, t1, 1, \sz, \op
+ __load t2, t2, 2, \sz, \op
+
+ eor \out0, \out0, t0, ror #24
+
+ __select t0, \in3, 3
+ .if \enc
+ __select \t3, \in3, 2
+ __select \t4, \in0, 3
+ .else
+ __select \t3, \in1, 2
+ __select \t4, \in2, 3
+ .endif
+ __load \t3, \t3, 2, \sz, \op
+ __load t0, t0, 3, \sz, \op
+ __load \t4, \t4, 3, \sz, \op
+
+ .ifnb \oldcpsr
+ /*
+ * This is the final round and we're done with all data-dependent table
+ * lookups, so we can safely re-enable interrupts.
+ */
+ restore_irqs \oldcpsr
+ .endif
+
+ eor \out1, \out1, t1, ror #24
+ eor \out0, \out0, t2, ror #16
+ ldm rk!, {t1, t2}
+ eor \out1, \out1, \t3, ror #16
+ eor \out0, \out0, t0, ror #8
+ eor \out1, \out1, \t4, ror #8
+ eor \out0, \out0, t1
+ eor \out1, \out1, t2
+ .endm
+
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
+ .endm
+
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
+ .endm
+
+ .macro __rev, out, in
+ .if __LINUX_ARM_ARCH__ < 6
+ lsl t0, \in, #24
+ and t1, \in, #0xff00
+ and t2, \in, #0xff0000
+ orr \out, t0, \in, lsr #24
+ orr \out, \out, t1, lsl #8
+ orr \out, \out, t2, lsr #8
+ .else
+ rev \out, \in
+ .endif
+ .endm
+
+ .macro __adrl, out, sym, c
+ .if __LINUX_ARM_ARCH__ < 7
+ ldr\c \out, =\sym
+ .else
+ movw\c \out, #:lower16:\sym
+ movt\c \out, #:upper16:\sym
+ .endif
+ .endm
+
+ .macro do_crypt, round, ttab, ltab, bsz
+ push {r3-r11, lr}
+
+ // Load keys first, to reduce latency in case they're not cached yet.
+ ldm rk!, {r8-r11}
+
+ ldr r4, [in]
+ ldr r5, [in, #4]
+ ldr r6, [in, #8]
+ ldr r7, [in, #12]
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ __rev r4, r4
+ __rev r5, r5
+ __rev r6, r6
+ __rev r7, r7
+#endif
+
+ eor r4, r4, r8
+ eor r5, r5, r9
+ eor r6, r6, r10
+ eor r7, r7, r11
+
+ __adrl ttab, \ttab
+ /*
+ * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+ * L1 cache, assuming cacheline size >= 32. This is a hardening measure
+ * intended to make cache-timing attacks more difficult. They may not
+ * be fully prevented, however; see the paper
+ * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+ * ("Cache-timing attacks on AES") for a discussion of the many
+ * difficulties involved in writing truly constant-time AES software.
+ */
+ save_and_disable_irqs t0
+ .set i, 0
+ .rept 1024 / 128
+ ldr r8, [ttab, #i + 0]
+ ldr r9, [ttab, #i + 32]
+ ldr r10, [ttab, #i + 64]
+ ldr r11, [ttab, #i + 96]
+ .set i, i + 128
+ .endr
+ push {t0} // oldcpsr
+
+ tst rounds, #2
+ bne 1f
+
+0: \round r8, r9, r10, r11, r4, r5, r6, r7
+ \round r4, r5, r6, r7, r8, r9, r10, r11
+
+1: subs rounds, rounds, #4
+ \round r8, r9, r10, r11, r4, r5, r6, r7
+ bls 2f
+ \round r4, r5, r6, r7, r8, r9, r10, r11
+ b 0b
+
+2: .ifb \ltab
+ add ttab, ttab, #1
+ .else
+ __adrl ttab, \ltab
+ // Prefetch inverse S-box for final round; see explanation above
+ .set i, 0
+ .rept 256 / 64
+ ldr t0, [ttab, #i + 0]
+ ldr t1, [ttab, #i + 32]
+ .set i, i + 64
+ .endr
+ .endif
+
+ pop {rounds} // oldcpsr
+ \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ __rev r4, r4
+ __rev r5, r5
+ __rev r6, r6
+ __rev r7, r7
+#endif
+
+ ldr out, [sp]
+
+ str r4, [out]
+ str r5, [out, #4]
+ str r6, [out, #8]
+ str r7, [out, #12]
+
+ pop {r3-r11, pc}
+
+ .align 3
+ .ltorg
+ .endm
+
+ENTRY(__aes_arm_encrypt)
+ do_crypt fround, crypto_ft_tab,, 2
+ENDPROC(__aes_arm_encrypt)
+
+ .align 5
+ENTRY(__aes_arm_decrypt)
+ do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
+ENDPROC(__aes_arm_decrypt)