summaryrefslogtreecommitdiffstats
path: root/arch/arm64/crypto/aes-ce-ccm-core.S
blob: f2624238fd9543d9894eea87f4e0c86df31be941 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
 *
 * Copyright (C) 2013 - 2017 Linaro Ltd.
 * Copyright (C) 2024 Google LLC
 *
 * Author: Ard Biesheuvel <ardb@kernel.org>
 */

#include <linux/linkage.h>
#include <asm/assembler.h>

	.text
	.arch	armv8-a+crypto

	.macro	load_round_keys, rk, nr, tmp
	sub	w\tmp, \nr, #10
	add	\tmp, \rk, w\tmp, sxtw #4
	ld1	{v10.4s-v13.4s}, [\rk]
	ld1	{v14.4s-v17.4s}, [\tmp], #64
	ld1	{v18.4s-v21.4s}, [\tmp], #64
	ld1	{v3.4s-v5.4s}, [\tmp]
	.endm

	.macro	dround, va, vb, vk
	aese	\va\().16b, \vk\().16b
	aesmc	\va\().16b, \va\().16b
	aese	\vb\().16b, \vk\().16b
	aesmc	\vb\().16b, \vb\().16b
	.endm

	.macro	aes_encrypt, va, vb, nr
	tbz	\nr, #2, .L\@
	dround	\va, \vb, v10
	dround	\va, \vb, v11
	tbz	\nr, #1, .L\@
	dround	\va, \vb, v12
	dround	\va, \vb, v13
.L\@:	.irp	v, v14, v15, v16, v17, v18, v19, v20, v21, v3
	dround	\va, \vb, \v
	.endr
	aese	\va\().16b, v4.16b
	aese	\vb\().16b, v4.16b
	.endm

	.macro	aes_ccm_do_crypt,enc
	load_round_keys	x3, w4, x10

	ld1	{v0.16b}, [x5]			/* load mac */
	cbz	x2, ce_aes_ccm_final
	ldr	x8, [x6, #8]			/* load lower ctr */
CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
0:	/* outer loop */
	ld1	{v1.8b}, [x6]			/* load upper ctr */
	prfm	pldl1strm, [x1]
	add	x8, x8, #1
	rev	x9, x8
	ins	v1.d[1], x9			/* no carry in lower ctr */

	aes_encrypt	v0, v1, w4

	subs	w2, w2, #16
	bmi	ce_aes_ccm_crypt_tail
	ld1	{v2.16b}, [x1], #16		/* load next input block */
	.if	\enc == 1
	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
	eor	v6.16b, v1.16b, v2.16b		/* xor with crypted ctr */
	.else
	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
	eor	v6.16b, v2.16b, v5.16b		/* final round enc */
	.endif
	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
	st1	{v6.16b}, [x0], #16		/* write output block */
	bne	0b
CPU_LE(	rev	x8, x8			)
	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
	cbnz	x7, ce_aes_ccm_final
	st1	{v0.16b}, [x5]			/* store mac */
	ret
	.endm

SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
	eor	v1.16b, v1.16b, v5.16b		/* final round enc */

	add	x1, x1, w2, sxtw		/* rewind the input pointer (w2 < 0) */
	add	x0, x0, w2, sxtw		/* rewind the output pointer */

	adr_l	x8, .Lpermute			/* load permute vectors */
	add	x9, x8, w2, sxtw
	sub	x8, x8, w2, sxtw
	ld1	{v7.16b-v8.16b}, [x9]
	ld1	{v9.16b}, [x8]

	ld1	{v2.16b}, [x1]			/* load a full block of input */
	tbl	v1.16b, {v1.16b}, v7.16b	/* move keystream to end of register */
	eor	v7.16b, v2.16b, v1.16b		/* encrypt partial input block */
	bif	v2.16b, v7.16b, v22.16b		/* select plaintext */
	tbx	v7.16b, {v6.16b}, v8.16b	/* insert output from previous iteration */
	tbl	v2.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v2 */
	eor	v0.16b, v0.16b, v2.16b		/* fold plaintext into mac */

	st1	{v7.16b}, [x0]			/* store output block */
	cbz	x7, 0f

SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
	ld1	{v1.16b}, [x7]			/* load 1st ctriv */

	aes_encrypt	v0, v1, w4

	/* final round key cancels out */
	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
0:	st1	{v0.16b}, [x5]			/* store result */
	ret
SYM_FUNC_END(ce_aes_ccm_crypt_tail)

	/*
	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
	 * 			   u8 const rk[], u32 rounds, u8 mac[],
	 * 			   u8 ctr[], u8 const final_iv[]);
	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
	 * 			   u8 const rk[], u32 rounds, u8 mac[],
	 * 			   u8 ctr[], u8 const final_iv[]);
	 */
SYM_FUNC_START(ce_aes_ccm_encrypt)
	movi	v22.16b, #255
	aes_ccm_do_crypt	1
SYM_FUNC_END(ce_aes_ccm_encrypt)

SYM_FUNC_START(ce_aes_ccm_decrypt)
	movi	v22.16b, #0
	aes_ccm_do_crypt	0
SYM_FUNC_END(ce_aes_ccm_decrypt)

	.section ".rodata", "a"
	.align	6
	.fill	15, 1, 0xff
.Lpermute:
	.byte	0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
	.byte	0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
	.fill	15, 1, 0xff