summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S
blob: 7ad59d92d02b28e4a6b328fde96039329ea8862a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
// SPDX-License-Identifier: GPL-2.0-only
// Copyright (C) 2021-3 ARM Limited.
//
// Assembly portion of the FP ptrace test

//
// Load values from memory into registers, break on a breakpoint, then
// break on a further breakpoint
//

#include "fp-ptrace.h"
#include "sme-inst.h"

.arch_extension sve

// Load and save register values with pauses for ptrace
//
// x0 - SVE in use
// x1 - SME in use
// x2 - SME2 in use
// x3 - FA64 supported

.globl load_and_save
load_and_save:
	stp	x11, x12, [sp, #-0x10]!

	// This should be redundant in the SVE case
	ldr	x7, =v_in
	ldp	q0, q1, [x7]
	ldp	q2, q3, [x7, #16 * 2]
	ldp	q4, q5, [x7, #16 * 4]
	ldp	q6, q7, [x7, #16 * 6]
	ldp	q8, q9, [x7, #16 * 8]
	ldp	q10, q11, [x7, #16 * 10]
	ldp	q12, q13, [x7, #16 * 12]
	ldp	q14, q15, [x7, #16 * 14]
	ldp	q16, q17, [x7, #16 * 16]
	ldp	q18, q19, [x7, #16 * 18]
	ldp	q20, q21, [x7, #16 * 20]
	ldp	q22, q23, [x7, #16 * 22]
	ldp	q24, q25, [x7, #16 * 24]
	ldp	q26, q27, [x7, #16 * 26]
	ldp	q28, q29, [x7, #16 * 28]
	ldp	q30, q31, [x7, #16 * 30]

	// SME?
	cbz	x1, check_sve_in

	adrp	x7, svcr_in
	ldr	x7, [x7, :lo12:svcr_in]
	// SVCR is 0 by default, avoid triggering SME if not in use
	cbz	x7, check_sve_in
	msr	S3_3_C4_C2_2, x7

	// ZA?
	tbz	x7, #SVCR_ZA_SHIFT, check_sm_in
	rdsvl	11, 1
	mov	w12, #0
	ldr	x6, =za_in
1:	_ldr_za 12, 6
	add	x6, x6, x11
	add	x12, x12, #1
	cmp	x11, x12
	bne	1b

	// ZT?
	cbz	x2, check_sm_in
	adrp	x6, zt_in
	add	x6, x6, :lo12:zt_in
	_ldr_zt 6

	// In streaming mode?
check_sm_in:
	tbz	x7, #SVCR_SM_SHIFT, check_sve_in
	mov	x4, x3		// Load FFR if we have FA64
	b	load_sve

	// SVE?
check_sve_in:
	cbz	x0, wait_for_writes
	mov	x4, #1

load_sve:
	ldr	x7, =z_in
	ldr	z0, [x7, #0, MUL VL]
	ldr	z1, [x7, #1, MUL VL]
	ldr	z2, [x7, #2, MUL VL]
	ldr	z3, [x7, #3, MUL VL]
	ldr	z4, [x7, #4, MUL VL]
	ldr	z5, [x7, #5, MUL VL]
	ldr	z6, [x7, #6, MUL VL]
	ldr	z7, [x7, #7, MUL VL]
	ldr	z8, [x7, #8, MUL VL]
	ldr	z9, [x7, #9, MUL VL]
	ldr	z10, [x7, #10, MUL VL]
	ldr	z11, [x7, #11, MUL VL]
	ldr	z12, [x7, #12, MUL VL]
	ldr	z13, [x7, #13, MUL VL]
	ldr	z14, [x7, #14, MUL VL]
	ldr	z15, [x7, #15, MUL VL]
	ldr	z16, [x7, #16, MUL VL]
	ldr	z17, [x7, #17, MUL VL]
	ldr	z18, [x7, #18, MUL VL]
	ldr	z19, [x7, #19, MUL VL]
	ldr	z20, [x7, #20, MUL VL]
	ldr	z21, [x7, #21, MUL VL]
	ldr	z22, [x7, #22, MUL VL]
	ldr	z23, [x7, #23, MUL VL]
	ldr	z24, [x7, #24, MUL VL]
	ldr	z25, [x7, #25, MUL VL]
	ldr	z26, [x7, #26, MUL VL]
	ldr	z27, [x7, #27, MUL VL]
	ldr	z28, [x7, #28, MUL VL]
	ldr	z29, [x7, #29, MUL VL]
	ldr	z30, [x7, #30, MUL VL]
	ldr	z31, [x7, #31, MUL VL]

	// FFR is not present in base SME
	cbz	x4, 1f
	ldr	x7, =ffr_in
	ldr	p0, [x7]
	ldr	x7, [x7, #0]
	cbz	x7, 1f
	wrffr	p0.b
1:

	ldr	x7, =p_in
	ldr	p0, [x7, #0, MUL VL]
	ldr	p1, [x7, #1, MUL VL]
	ldr	p2, [x7, #2, MUL VL]
	ldr	p3, [x7, #3, MUL VL]
	ldr	p4, [x7, #4, MUL VL]
	ldr	p5, [x7, #5, MUL VL]
	ldr	p6, [x7, #6, MUL VL]
	ldr	p7, [x7, #7, MUL VL]
	ldr	p8, [x7, #8, MUL VL]
	ldr	p9, [x7, #9, MUL VL]
	ldr	p10, [x7, #10, MUL VL]
	ldr	p11, [x7, #11, MUL VL]
	ldr	p12, [x7, #12, MUL VL]
	ldr	p13, [x7, #13, MUL VL]
	ldr	p14, [x7, #14, MUL VL]
	ldr	p15, [x7, #15, MUL VL]

wait_for_writes:
	// Wait for the parent
	brk #0

	// Save values
	ldr	x7, =v_out
	stp	q0, q1, [x7]
	stp	q2, q3, [x7, #16 * 2]
	stp	q4, q5, [x7, #16 * 4]
	stp	q6, q7, [x7, #16 * 6]
	stp	q8, q9, [x7, #16 * 8]
	stp	q10, q11, [x7, #16 * 10]
	stp	q12, q13, [x7, #16 * 12]
	stp	q14, q15, [x7, #16 * 14]
	stp	q16, q17, [x7, #16 * 16]
	stp	q18, q19, [x7, #16 * 18]
	stp	q20, q21, [x7, #16 * 20]
	stp	q22, q23, [x7, #16 * 22]
	stp	q24, q25, [x7, #16 * 24]
	stp	q26, q27, [x7, #16 * 26]
	stp	q28, q29, [x7, #16 * 28]
	stp	q30, q31, [x7, #16 * 30]

	// SME?
	cbz	x1, check_sve_out

	rdsvl	11, 1
	adrp	x6, sme_vl_out
	str	x11, [x6, :lo12:sme_vl_out]

	mrs	x7, S3_3_C4_C2_2
	adrp	x6, svcr_out
	str	x7, [x6, :lo12:svcr_out]

	// ZA?
	tbz	x7, #SVCR_ZA_SHIFT, check_sm_out
	mov	w12, #0
	ldr	x6, =za_out
1:	_str_za 12, 6
	add	x6, x6, x11
	add	x12, x12, #1
	cmp	x11, x12
	bne	1b

	// ZT?
	cbz	x2, check_sm_out
	adrp	x6, zt_out
	add	x6, x6, :lo12:zt_out
	_str_zt 6

	// In streaming mode?
check_sm_out:
	tbz	x7, #SVCR_SM_SHIFT, check_sve_out
	mov	x4, x3				// FFR?
	b	read_sve

	// SVE?
check_sve_out:
	cbz	x0, wait_for_reads
	mov	x4, #1

	rdvl	x7, #1
	adrp	x6, sve_vl_out
	str	x7, [x6, :lo12:sve_vl_out]

read_sve:
	ldr	x7, =z_out
	str	z0, [x7, #0, MUL VL]
	str	z1, [x7, #1, MUL VL]
	str	z2, [x7, #2, MUL VL]
	str	z3, [x7, #3, MUL VL]
	str	z4, [x7, #4, MUL VL]
	str	z5, [x7, #5, MUL VL]
	str	z6, [x7, #6, MUL VL]
	str	z7, [x7, #7, MUL VL]
	str	z8, [x7, #8, MUL VL]
	str	z9, [x7, #9, MUL VL]
	str	z10, [x7, #10, MUL VL]
	str	z11, [x7, #11, MUL VL]
	str	z12, [x7, #12, MUL VL]
	str	z13, [x7, #13, MUL VL]
	str	z14, [x7, #14, MUL VL]
	str	z15, [x7, #15, MUL VL]
	str	z16, [x7, #16, MUL VL]
	str	z17, [x7, #17, MUL VL]
	str	z18, [x7, #18, MUL VL]
	str	z19, [x7, #19, MUL VL]
	str	z20, [x7, #20, MUL VL]
	str	z21, [x7, #21, MUL VL]
	str	z22, [x7, #22, MUL VL]
	str	z23, [x7, #23, MUL VL]
	str	z24, [x7, #24, MUL VL]
	str	z25, [x7, #25, MUL VL]
	str	z26, [x7, #26, MUL VL]
	str	z27, [x7, #27, MUL VL]
	str	z28, [x7, #28, MUL VL]
	str	z29, [x7, #29, MUL VL]
	str	z30, [x7, #30, MUL VL]
	str	z31, [x7, #31, MUL VL]

	ldr	x7, =p_out
	str	p0, [x7, #0, MUL VL]
	str	p1, [x7, #1, MUL VL]
	str	p2, [x7, #2, MUL VL]
	str	p3, [x7, #3, MUL VL]
	str	p4, [x7, #4, MUL VL]
	str	p5, [x7, #5, MUL VL]
	str	p6, [x7, #6, MUL VL]
	str	p7, [x7, #7, MUL VL]
	str	p8, [x7, #8, MUL VL]
	str	p9, [x7, #9, MUL VL]
	str	p10, [x7, #10, MUL VL]
	str	p11, [x7, #11, MUL VL]
	str	p12, [x7, #12, MUL VL]
	str	p13, [x7, #13, MUL VL]
	str	p14, [x7, #14, MUL VL]
	str	p15, [x7, #15, MUL VL]

	// Only save FFR if it exists
	cbz	x4, wait_for_reads
	ldr	x7, =ffr_out
	rdffr	p0.b
	str	p0, [x7]

wait_for_reads:
	// Wait for the parent
	brk #0

	// Ensure we don't leave ourselves in streaming mode
	cbz	x1, out
	msr	S3_3_C4_C2_2, xzr

out:
	ldp	x11, x12, [sp, #-0x10]
	ret