summaryrefslogtreecommitdiffstats
path: root/arch/arm64/crypto/aes-modes.S
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/arm64/crypto/aes-modes.S520
1 files changed, 520 insertions, 0 deletions
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
new file mode 100644
index 000000000..496c243de
--- /dev/null
+++ b/arch/arm64/crypto/aes-modes.S
@@ -0,0 +1,520 @@
+/*
+ * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
+ *
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* included by aes-ce.S and aes-neon.S */
+
+ .text
+ .align 4
+
+aes_encrypt_block4x:
+ encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
+ ret
+ENDPROC(aes_encrypt_block4x)
+
+aes_decrypt_block4x:
+ decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
+ ret
+ENDPROC(aes_decrypt_block4x)
+
+ /*
+ * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ */
+
+AES_ENTRY(aes_ecb_encrypt)
+ frame_push 5
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+
+.Lecbencrestart:
+ enc_prepare w22, x21, x5
+
+.LecbencloopNx:
+ subs w23, w23, #4
+ bmi .Lecbenc1x
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
+ bl aes_encrypt_block4x
+ st1 {v0.16b-v3.16b}, [x19], #64
+ cond_yield_neon .Lecbencrestart
+ b .LecbencloopNx
+.Lecbenc1x:
+ adds w23, w23, #4
+ beq .Lecbencout
+.Lecbencloop:
+ ld1 {v0.16b}, [x20], #16 /* get next pt block */
+ encrypt_block v0, w22, x21, x5, w6
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
+ bne .Lecbencloop
+.Lecbencout:
+ frame_pop
+ ret
+AES_ENDPROC(aes_ecb_encrypt)
+
+
+AES_ENTRY(aes_ecb_decrypt)
+ frame_push 5
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+
+.Lecbdecrestart:
+ dec_prepare w22, x21, x5
+
+.LecbdecloopNx:
+ subs w23, w23, #4
+ bmi .Lecbdec1x
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
+ bl aes_decrypt_block4x
+ st1 {v0.16b-v3.16b}, [x19], #64
+ cond_yield_neon .Lecbdecrestart
+ b .LecbdecloopNx
+.Lecbdec1x:
+ adds w23, w23, #4
+ beq .Lecbdecout
+.Lecbdecloop:
+ ld1 {v0.16b}, [x20], #16 /* get next ct block */
+ decrypt_block v0, w22, x21, x5, w6
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
+ bne .Lecbdecloop
+.Lecbdecout:
+ frame_pop
+ ret
+AES_ENDPROC(aes_ecb_decrypt)
+
+
+ /*
+ * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ */
+
+AES_ENTRY(aes_cbc_encrypt)
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+.Lcbcencrestart:
+ ld1 {v4.16b}, [x24] /* get iv */
+ enc_prepare w22, x21, x6
+
+.Lcbcencloop4x:
+ subs w23, w23, #4
+ bmi .Lcbcenc1x
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
+ eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
+ encrypt_block v0, w22, x21, x6, w7
+ eor v1.16b, v1.16b, v0.16b
+ encrypt_block v1, w22, x21, x6, w7
+ eor v2.16b, v2.16b, v1.16b
+ encrypt_block v2, w22, x21, x6, w7
+ eor v3.16b, v3.16b, v2.16b
+ encrypt_block v3, w22, x21, x6, w7
+ st1 {v0.16b-v3.16b}, [x19], #64
+ mov v4.16b, v3.16b
+ st1 {v4.16b}, [x24] /* return iv */
+ cond_yield_neon .Lcbcencrestart
+ b .Lcbcencloop4x
+.Lcbcenc1x:
+ adds w23, w23, #4
+ beq .Lcbcencout
+.Lcbcencloop:
+ ld1 {v0.16b}, [x20], #16 /* get next pt block */
+ eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
+ encrypt_block v4, w22, x21, x6, w7
+ st1 {v4.16b}, [x19], #16
+ subs w23, w23, #1
+ bne .Lcbcencloop
+.Lcbcencout:
+ st1 {v4.16b}, [x24] /* return iv */
+ frame_pop
+ ret
+AES_ENDPROC(aes_cbc_encrypt)
+
+
+AES_ENTRY(aes_cbc_decrypt)
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+.Lcbcdecrestart:
+ ld1 {v7.16b}, [x24] /* get iv */
+ dec_prepare w22, x21, x6
+
+.LcbcdecloopNx:
+ subs w23, w23, #4
+ bmi .Lcbcdec1x
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
+ mov v4.16b, v0.16b
+ mov v5.16b, v1.16b
+ mov v6.16b, v2.16b
+ bl aes_decrypt_block4x
+ sub x20, x20, #16
+ eor v0.16b, v0.16b, v7.16b
+ eor v1.16b, v1.16b, v4.16b
+ ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
+ eor v2.16b, v2.16b, v5.16b
+ eor v3.16b, v3.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v7.16b}, [x24] /* return iv */
+ cond_yield_neon .Lcbcdecrestart
+ b .LcbcdecloopNx
+.Lcbcdec1x:
+ adds w23, w23, #4
+ beq .Lcbcdecout
+.Lcbcdecloop:
+ ld1 {v1.16b}, [x20], #16 /* get next ct block */
+ mov v0.16b, v1.16b /* ...and copy to v0 */
+ decrypt_block v0, w22, x21, x6, w7
+ eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
+ mov v7.16b, v1.16b /* ct is next iv */
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
+ bne .Lcbcdecloop
+.Lcbcdecout:
+ st1 {v7.16b}, [x24] /* return iv */
+ frame_pop
+ ret
+AES_ENDPROC(aes_cbc_decrypt)
+
+
+ /*
+ * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 ctr[])
+ */
+
+AES_ENTRY(aes_ctr_encrypt)
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+.Lctrrestart:
+ enc_prepare w22, x21, x6
+ ld1 {v4.16b}, [x24]
+
+ umov x6, v4.d[1] /* keep swabbed ctr in reg */
+ rev x6, x6
+.LctrloopNx:
+ subs w23, w23, #4
+ bmi .Lctr1x
+ cmn w6, #4 /* 32 bit overflow? */
+ bcs .Lctr1x
+ add w7, w6, #1
+ mov v0.16b, v4.16b
+ add w8, w6, #2
+ mov v1.16b, v4.16b
+ add w9, w6, #3
+ mov v2.16b, v4.16b
+ rev w7, w7
+ mov v3.16b, v4.16b
+ rev w8, w8
+ mov v1.s[3], w7
+ rev w9, w9
+ mov v2.s[3], w8
+ mov v3.s[3], w9
+ ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
+ bl aes_encrypt_block4x
+ eor v0.16b, v5.16b, v0.16b
+ ld1 {v5.16b}, [x20], #16 /* get 1 input block */
+ eor v1.16b, v6.16b, v1.16b
+ eor v2.16b, v7.16b, v2.16b
+ eor v3.16b, v5.16b, v3.16b
+ st1 {v0.16b-v3.16b}, [x19], #64
+ add x6, x6, #4
+ rev x7, x6
+ ins v4.d[1], x7
+ cbz w23, .Lctrout
+ st1 {v4.16b}, [x24] /* return next CTR value */
+ cond_yield_neon .Lctrrestart
+ b .LctrloopNx
+.Lctr1x:
+ adds w23, w23, #4
+ beq .Lctrout
+.Lctrloop:
+ mov v0.16b, v4.16b
+ encrypt_block v0, w22, x21, x8, w7
+
+ adds x6, x6, #1 /* increment BE ctr */
+ rev x7, x6
+ ins v4.d[1], x7
+ bcs .Lctrcarry /* overflow? */
+
+.Lctrcarrydone:
+ subs w23, w23, #1
+ bmi .Lctrtailblock /* blocks <0 means tail block */
+ ld1 {v3.16b}, [x20], #16
+ eor v3.16b, v0.16b, v3.16b
+ st1 {v3.16b}, [x19], #16
+ bne .Lctrloop
+
+.Lctrout:
+ st1 {v4.16b}, [x24] /* return next CTR value */
+.Lctrret:
+ frame_pop
+ ret
+
+.Lctrtailblock:
+ st1 {v0.16b}, [x19]
+ b .Lctrret
+
+.Lctrcarry:
+ umov x7, v4.d[0] /* load upper word of ctr */
+ rev x7, x7 /* ... to handle the carry */
+ add x7, x7, #1
+ rev x7, x7
+ ins v4.d[0], x7
+ b .Lctrcarrydone
+AES_ENDPROC(aes_ctr_encrypt)
+ .ltorg
+
+
+ /*
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 const rk2[], u8 iv[], int first)
+ */
+
+ .macro next_tweak, out, in, const, tmp
+ sshr \tmp\().2d, \in\().2d, #63
+ and \tmp\().16b, \tmp\().16b, \const\().16b
+ add \out\().2d, \in\().2d, \in\().2d
+ ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
+ eor \out\().16b, \out\().16b, \tmp\().16b
+ .endm
+
+.Lxts_mul_x:
+CPU_LE( .quad 1, 0x87 )
+CPU_BE( .quad 0x87, 1 )
+
+AES_ENTRY(aes_xts_encrypt)
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x6
+
+ ld1 {v4.16b}, [x24]
+ cbz w7, .Lxtsencnotfirst
+
+ enc_prepare w3, x5, x8
+ encrypt_block v4, w3, x5, x8, w7 /* first tweak */
+ enc_switch_key w3, x2, x8
+ ldr q7, .Lxts_mul_x
+ b .LxtsencNx
+
+.Lxtsencrestart:
+ ld1 {v4.16b}, [x24]
+.Lxtsencnotfirst:
+ enc_prepare w22, x21, x8
+.LxtsencloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsencNx:
+ subs w23, w23, #4
+ bmi .Lxtsenc1x
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ bl aes_encrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x19], #64
+ mov v4.16b, v7.16b
+ cbz w23, .Lxtsencout
+ st1 {v4.16b}, [x24]
+ cond_yield_neon .Lxtsencrestart
+ b .LxtsencloopNx
+.Lxtsenc1x:
+ adds w23, w23, #4
+ beq .Lxtsencout
+.Lxtsencloop:
+ ld1 {v1.16b}, [x20], #16
+ eor v0.16b, v1.16b, v4.16b
+ encrypt_block v0, w22, x21, x8, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
+ beq .Lxtsencout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsencloop
+.Lxtsencout:
+ st1 {v4.16b}, [x24]
+ frame_pop
+ ret
+AES_ENDPROC(aes_xts_encrypt)
+
+
+AES_ENTRY(aes_xts_decrypt)
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x6
+
+ ld1 {v4.16b}, [x24]
+ cbz w7, .Lxtsdecnotfirst
+
+ enc_prepare w3, x5, x8
+ encrypt_block v4, w3, x5, x8, w7 /* first tweak */
+ dec_prepare w3, x2, x8
+ ldr q7, .Lxts_mul_x
+ b .LxtsdecNx
+
+.Lxtsdecrestart:
+ ld1 {v4.16b}, [x24]
+.Lxtsdecnotfirst:
+ dec_prepare w22, x21, x8
+.LxtsdecloopNx:
+ ldr q7, .Lxts_mul_x
+ next_tweak v4, v4, v7, v8
+.LxtsdecNx:
+ subs w23, w23, #4
+ bmi .Lxtsdec1x
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
+ next_tweak v5, v4, v7, v8
+ eor v0.16b, v0.16b, v4.16b
+ next_tweak v6, v5, v7, v8
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ next_tweak v7, v6, v7, v8
+ eor v3.16b, v3.16b, v7.16b
+ bl aes_decrypt_block4x
+ eor v3.16b, v3.16b, v7.16b
+ eor v0.16b, v0.16b, v4.16b
+ eor v1.16b, v1.16b, v5.16b
+ eor v2.16b, v2.16b, v6.16b
+ st1 {v0.16b-v3.16b}, [x19], #64
+ mov v4.16b, v7.16b
+ cbz w23, .Lxtsdecout
+ st1 {v4.16b}, [x24]
+ cond_yield_neon .Lxtsdecrestart
+ b .LxtsdecloopNx
+.Lxtsdec1x:
+ adds w23, w23, #4
+ beq .Lxtsdecout
+.Lxtsdecloop:
+ ld1 {v1.16b}, [x20], #16
+ eor v0.16b, v1.16b, v4.16b
+ decrypt_block v0, w22, x21, x8, w7
+ eor v0.16b, v0.16b, v4.16b
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
+ beq .Lxtsdecout
+ next_tweak v4, v4, v7, v8
+ b .Lxtsdecloop
+.Lxtsdecout:
+ st1 {v4.16b}, [x24]
+ frame_pop
+ ret
+AES_ENDPROC(aes_xts_decrypt)
+
+ /*
+ * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ * int blocks, u8 dg[], int enc_before, int enc_after)
+ */
+AES_ENTRY(aes_mac_update)
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x6
+
+ ld1 {v0.16b}, [x23] /* get dg */
+ enc_prepare w2, x1, x7
+ cbz w5, .Lmacloop4x
+
+ encrypt_block v0, w2, x1, x7, w8
+
+.Lmacloop4x:
+ subs w22, w22, #4
+ bmi .Lmac1x
+ ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
+ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
+ encrypt_block v0, w21, x20, x7, w8
+ eor v0.16b, v0.16b, v2.16b
+ encrypt_block v0, w21, x20, x7, w8
+ eor v0.16b, v0.16b, v3.16b
+ encrypt_block v0, w21, x20, x7, w8
+ eor v0.16b, v0.16b, v4.16b
+ cmp w22, wzr
+ csinv x5, x24, xzr, eq
+ cbz w5, .Lmacout
+ encrypt_block v0, w21, x20, x7, w8
+ st1 {v0.16b}, [x23] /* return dg */
+ cond_yield_neon .Lmacrestart
+ b .Lmacloop4x
+.Lmac1x:
+ add w22, w22, #4
+.Lmacloop:
+ cbz w22, .Lmacout
+ ld1 {v1.16b}, [x19], #16 /* get next pt block */
+ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
+
+ subs w22, w22, #1
+ csinv x5, x24, xzr, eq
+ cbz w5, .Lmacout
+
+.Lmacenc:
+ encrypt_block v0, w21, x20, x7, w8
+ b .Lmacloop
+
+.Lmacout:
+ st1 {v0.16b}, [x23] /* return dg */
+ frame_pop
+ ret
+
+.Lmacrestart:
+ ld1 {v0.16b}, [x23] /* get dg */
+ enc_prepare w21, x20, x0
+ b .Lmacloop4x
+AES_ENDPROC(aes_mac_update)