// SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2021-3 ARM Limited. // // Assembly portion of the FP ptrace test // // Load values from memory into registers, break on a breakpoint, then // break on a further breakpoint // #include "fp-ptrace.h" #include "sme-inst.h" .arch_extension sve // Load and save register values with pauses for ptrace // // x0 - SVE in use // x1 - SME in use // x2 - SME2 in use // x3 - FA64 supported .globl load_and_save load_and_save: stp x11, x12, [sp, #-0x10]! // This should be redundant in the SVE case ldr x7, =v_in ldp q0, q1, [x7] ldp q2, q3, [x7, #16 * 2] ldp q4, q5, [x7, #16 * 4] ldp q6, q7, [x7, #16 * 6] ldp q8, q9, [x7, #16 * 8] ldp q10, q11, [x7, #16 * 10] ldp q12, q13, [x7, #16 * 12] ldp q14, q15, [x7, #16 * 14] ldp q16, q17, [x7, #16 * 16] ldp q18, q19, [x7, #16 * 18] ldp q20, q21, [x7, #16 * 20] ldp q22, q23, [x7, #16 * 22] ldp q24, q25, [x7, #16 * 24] ldp q26, q27, [x7, #16 * 26] ldp q28, q29, [x7, #16 * 28] ldp q30, q31, [x7, #16 * 30] // SME? cbz x1, check_sve_in adrp x7, svcr_in ldr x7, [x7, :lo12:svcr_in] // SVCR is 0 by default, avoid triggering SME if not in use cbz x7, check_sve_in msr S3_3_C4_C2_2, x7 // ZA? tbz x7, #SVCR_ZA_SHIFT, check_sm_in rdsvl 11, 1 mov w12, #0 ldr x6, =za_in 1: _ldr_za 12, 6 add x6, x6, x11 add x12, x12, #1 cmp x11, x12 bne 1b // ZT? cbz x2, check_sm_in adrp x6, zt_in add x6, x6, :lo12:zt_in _ldr_zt 6 // In streaming mode? check_sm_in: tbz x7, #SVCR_SM_SHIFT, check_sve_in mov x4, x3 // Load FFR if we have FA64 b load_sve // SVE? check_sve_in: cbz x0, wait_for_writes mov x4, #1 load_sve: ldr x7, =z_in ldr z0, [x7, #0, MUL VL] ldr z1, [x7, #1, MUL VL] ldr z2, [x7, #2, MUL VL] ldr z3, [x7, #3, MUL VL] ldr z4, [x7, #4, MUL VL] ldr z5, [x7, #5, MUL VL] ldr z6, [x7, #6, MUL VL] ldr z7, [x7, #7, MUL VL] ldr z8, [x7, #8, MUL VL] ldr z9, [x7, #9, MUL VL] ldr z10, [x7, #10, MUL VL] ldr z11, [x7, #11, MUL VL] ldr z12, [x7, #12, MUL VL] ldr z13, [x7, #13, MUL VL] ldr z14, [x7, #14, MUL VL] ldr z15, [x7, #15, MUL VL] ldr z16, [x7, #16, MUL VL] ldr z17, [x7, #17, MUL VL] ldr z18, [x7, #18, MUL VL] ldr z19, [x7, #19, MUL VL] ldr z20, [x7, #20, MUL VL] ldr z21, [x7, #21, MUL VL] ldr z22, [x7, #22, MUL VL] ldr z23, [x7, #23, MUL VL] ldr z24, [x7, #24, MUL VL] ldr z25, [x7, #25, MUL VL] ldr z26, [x7, #26, MUL VL] ldr z27, [x7, #27, MUL VL] ldr z28, [x7, #28, MUL VL] ldr z29, [x7, #29, MUL VL] ldr z30, [x7, #30, MUL VL] ldr z31, [x7, #31, MUL VL] // FFR is not present in base SME cbz x4, 1f ldr x7, =ffr_in ldr p0, [x7] ldr x7, [x7, #0] cbz x7, 1f wrffr p0.b 1: ldr x7, =p_in ldr p0, [x7, #0, MUL VL] ldr p1, [x7, #1, MUL VL] ldr p2, [x7, #2, MUL VL] ldr p3, [x7, #3, MUL VL] ldr p4, [x7, #4, MUL VL] ldr p5, [x7, #5, MUL VL] ldr p6, [x7, #6, MUL VL] ldr p7, [x7, #7, MUL VL] ldr p8, [x7, #8, MUL VL] ldr p9, [x7, #9, MUL VL] ldr p10, [x7, #10, MUL VL] ldr p11, [x7, #11, MUL VL] ldr p12, [x7, #12, MUL VL] ldr p13, [x7, #13, MUL VL] ldr p14, [x7, #14, MUL VL] ldr p15, [x7, #15, MUL VL] wait_for_writes: // Wait for the parent brk #0 // Save values ldr x7, =v_out stp q0, q1, [x7] stp q2, q3, [x7, #16 * 2] stp q4, q5, [x7, #16 * 4] stp q6, q7, [x7, #16 * 6] stp q8, q9, [x7, #16 * 8] stp q10, q11, [x7, #16 * 10] stp q12, q13, [x7, #16 * 12] stp q14, q15, [x7, #16 * 14] stp q16, q17, [x7, #16 * 16] stp q18, q19, [x7, #16 * 18] stp q20, q21, [x7, #16 * 20] stp q22, q23, [x7, #16 * 22] stp q24, q25, [x7, #16 * 24] stp q26, q27, [x7, #16 * 26] stp q28, q29, [x7, #16 * 28] stp q30, q31, [x7, #16 * 30] // SME? cbz x1, check_sve_out rdsvl 11, 1 adrp x6, sme_vl_out str x11, [x6, :lo12:sme_vl_out] mrs x7, S3_3_C4_C2_2 adrp x6, svcr_out str x7, [x6, :lo12:svcr_out] // ZA? tbz x7, #SVCR_ZA_SHIFT, check_sm_out mov w12, #0 ldr x6, =za_out 1: _str_za 12, 6 add x6, x6, x11 add x12, x12, #1 cmp x11, x12 bne 1b // ZT? cbz x2, check_sm_out adrp x6, zt_out add x6, x6, :lo12:zt_out _str_zt 6 // In streaming mode? check_sm_out: tbz x7, #SVCR_SM_SHIFT, check_sve_out mov x4, x3 // FFR? b read_sve // SVE? check_sve_out: cbz x0, wait_for_reads mov x4, #1 rdvl x7, #1 adrp x6, sve_vl_out str x7, [x6, :lo12:sve_vl_out] read_sve: ldr x7, =z_out str z0, [x7, #0, MUL VL] str z1, [x7, #1, MUL VL] str z2, [x7, #2, MUL VL] str z3, [x7, #3, MUL VL] str z4, [x7, #4, MUL VL] str z5, [x7, #5, MUL VL] str z6, [x7, #6, MUL VL] str z7, [x7, #7, MUL VL] str z8, [x7, #8, MUL VL] str z9, [x7, #9, MUL VL] str z10, [x7, #10, MUL VL] str z11, [x7, #11, MUL VL] str z12, [x7, #12, MUL VL] str z13, [x7, #13, MUL VL] str z14, [x7, #14, MUL VL] str z15, [x7, #15, MUL VL] str z16, [x7, #16, MUL VL] str z17, [x7, #17, MUL VL] str z18, [x7, #18, MUL VL] str z19, [x7, #19, MUL VL] str z20, [x7, #20, MUL VL] str z21, [x7, #21, MUL VL] str z22, [x7, #22, MUL VL] str z23, [x7, #23, MUL VL] str z24, [x7, #24, MUL VL] str z25, [x7, #25, MUL VL] str z26, [x7, #26, MUL VL] str z27, [x7, #27, MUL VL] str z28, [x7, #28, MUL VL] str z29, [x7, #29, MUL VL] str z30, [x7, #30, MUL VL] str z31, [x7, #31, MUL VL] ldr x7, =p_out str p0, [x7, #0, MUL VL] str p1, [x7, #1, MUL VL] str p2, [x7, #2, MUL VL] str p3, [x7, #3, MUL VL] str p4, [x7, #4, MUL VL] str p5, [x7, #5, MUL VL] str p6, [x7, #6, MUL VL] str p7, [x7, #7, MUL VL] str p8, [x7, #8, MUL VL] str p9, [x7, #9, MUL VL] str p10, [x7, #10, MUL VL] str p11, [x7, #11, MUL VL] str p12, [x7, #12, MUL VL] str p13, [x7, #13, MUL VL] str p14, [x7, #14, MUL VL] str p15, [x7, #15, MUL VL] // Only save FFR if it exists cbz x4, wait_for_reads ldr x7, =ffr_out rdffr p0.b str p0, [x7] wait_for_reads: // Wait for the parent brk #0 // Ensure we don't leave ourselves in streaming mode cbz x1, out msr S3_3_C4_C2_2, xzr out: ldp x11, x12, [sp, #-0x10] ret