diff options
Diffstat (limited to 'third_party/dav1d/tests/checkasm/x86/checkasm.asm')
-rw-r--r-- | third_party/dav1d/tests/checkasm/x86/checkasm.asm | 475 |
1 files changed, 475 insertions, 0 deletions
diff --git a/third_party/dav1d/tests/checkasm/x86/checkasm.asm b/third_party/dav1d/tests/checkasm/x86/checkasm.asm new file mode 100644 index 0000000000..8f19ef97f7 --- /dev/null +++ b/third_party/dav1d/tests/checkasm/x86/checkasm.asm @@ -0,0 +1,475 @@ +; Copyright © 2018, VideoLAN and dav1d authors +; Copyright © 2018, Two Orioles, LLC +; All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions are met: +; +; 1. Redistributions of source code must retain the above copyright notice, this +; list of conditions and the following disclaimer. +; +; 2. Redistributions in binary form must reproduce the above copyright notice, +; this list of conditions and the following disclaimer in the documentation +; and/or other materials provided with the distribution. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +%include "config.asm" +%undef private_prefix +%define private_prefix checkasm +%include "ext/x86/x86inc.asm" + +SECTION_RODATA 16 + +%if ARCH_X86_64 +; just random numbers to reduce the chance of incidental match +%if WIN64 +x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 +x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 +x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e +x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f +x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 +x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d +x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b +x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 +x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef +x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 +n7: dq 0x21f86d66c8ca00ce +n8: dq 0x75b6ba21077c48ad +%endif +n9: dq 0xed56bb2dcb3c7736 +n10: dq 0x8bda43d3fd1a7e06 +n11: dq 0xb64a9c9e5d318408 +n12: dq 0xdf9a54b303f1d3a3 +n13: dq 0x4a75479abd64e097 +n14: dq 0x249214109d5d1c88 +%endif + +errmsg_stack: db "stack corruption", 0 +errmsg_register: db "failed to preserve register:%s", 0 +errmsg_vzeroupper: db "missing vzeroupper", 0 + +SECTION .bss + +check_vzeroupper: resd 1 + +SECTION .text + +cextern fail_func + +; max number of args used by any asm function. +; (max_args % 4) must equal 3 for stack alignment +%define max_args 15 + +%if UNIX64 + DECLARE_REG_TMP 0 +%else + DECLARE_REG_TMP 4 +%endif + +;----------------------------------------------------------------------------- +; unsigned checkasm_init_x86(char *name) +;----------------------------------------------------------------------------- +cglobal init_x86, 0, 5 +%if ARCH_X86_64 + push rbx +%endif + movifnidn t0, r0mp + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000004 + jb .no_brand ; processor brand string not supported + mov eax, 0x80000002 + cpuid + mov [t0+4* 0], eax + mov [t0+4* 1], ebx + mov [t0+4* 2], ecx + mov [t0+4* 3], edx + mov eax, 0x80000003 + cpuid + mov [t0+4* 4], eax + mov [t0+4* 5], ebx + mov [t0+4* 6], ecx + mov [t0+4* 7], edx + mov eax, 0x80000004 + cpuid + mov [t0+4* 8], eax + mov [t0+4* 9], ebx + mov [t0+4*10], ecx + mov [t0+4*11], edx + xor eax, eax + cpuid + jmp .check_xcr1 +.no_brand: ; use manufacturer id as a fallback + xor eax, eax + mov [t0+4*3], eax + cpuid + mov [t0+4*0], ebx + mov [t0+4*1], edx + mov [t0+4*2], ecx +.check_xcr1: + test eax, eax + jz .end2 ; cpuid leaf 1 not supported + mov t0d, eax ; max leaf + mov eax, 1 + cpuid + and ecx, 0x18000000 + cmp ecx, 0x18000000 + jne .end2 ; osxsave/avx not supported + cmp t0d, 13 ; cpuid leaf 13 not supported + jb .end2 + mov t0d, eax ; cpuid signature + mov eax, 13 + mov ecx, 1 + cpuid + test al, 0x04 + jz .end ; xcr1 not supported + mov ecx, 1 + xgetbv + test al, 0x04 + jnz .end ; always-dirty ymm state +%if ARCH_X86_64 == 0 && PIC + LEA eax, check_vzeroupper + mov [eax], ecx +%else + mov [check_vzeroupper], ecx +%endif +.end: + mov eax, t0d +.end2: +%if ARCH_X86_64 + pop rbx +%endif + RET + +%if ARCH_X86_64 +%if WIN64 + %define stack_param rsp+32 ; shadow space + %define num_fn_args rsp+stack_offset+17*8 + %assign num_reg_args 4 + %assign free_regs 7 + %assign clobber_mask_stack_bit 16 + DECLARE_REG_TMP 4 +%else + %define stack_param rsp + %define num_fn_args rsp+stack_offset+11*8 + %assign num_reg_args 6 + %assign free_regs 9 + %assign clobber_mask_stack_bit 64 + DECLARE_REG_TMP 7 +%endif + +%macro CLOBBER_UPPER 2 ; reg, mask_bit + mov r13d, %1d + or r13, r8 + test r9b, %2 + cmovnz %1, r13 +%endmacro + +cglobal checked_call, 2, 15, 16, max_args*8+64+8 + mov r10d, [num_fn_args] + mov r8, 0xdeadbeef00000000 + mov r9d, [num_fn_args+r10*8+8] ; clobber_mask + mov t0, [num_fn_args+r10*8] ; func + + ; Clobber the upper halves of 32-bit parameters + CLOBBER_UPPER r0, 1 + CLOBBER_UPPER r1, 2 + CLOBBER_UPPER r2, 4 + CLOBBER_UPPER r3, 8 +%if UNIX64 + CLOBBER_UPPER r4, 16 + CLOBBER_UPPER r5, 32 +%else ; WIN64 +%assign i 6 +%rep 16-6 + mova m %+ i, [x %+ i] + %assign i i+1 +%endrep +%endif + + xor r11d, r11d + sub r10d, num_reg_args + cmovs r10d, r11d ; num stack args + + ; write stack canaries to the area above parameters passed on the stack + mov r12, [rsp+stack_offset] ; return address + not r12 +%assign i 0 +%rep 8 ; 64 bytes + mov [stack_param+(r10+i)*8], r12 + %assign i i+1 +%endrep + + test r10d, r10d + jz .stack_setup_done ; no stack parameters +.copy_stack_parameter: + mov r12, [stack_param+stack_offset+8+r11*8] + CLOBBER_UPPER r12, clobber_mask_stack_bit + shr r9d, 1 + mov [stack_param+r11*8], r12 + inc r11d + cmp r11d, r10d + jl .copy_stack_parameter +.stack_setup_done: + +%assign i 14 +%rep 15-free_regs + mov r %+ i, [n %+ i] + %assign i i-1 +%endrep + call t0 + + ; check for stack corruption + mov r0d, [num_fn_args] + xor r3d, r3d + sub r0d, num_reg_args + cmovs r0d, r3d ; num stack args + + mov r3, [rsp+stack_offset] + mov r4, [stack_param+r0*8] + not r3 + xor r4, r3 +%assign i 1 +%rep 6 + mov r5, [stack_param+(r0+i)*8] + xor r5, r3 + or r4, r5 + %assign i i+1 +%endrep + xor r3, [stack_param+(r0+7)*8] + or r4, r3 + jz .stack_ok + ; Save the return value located in rdx:rax first to prevent clobbering. + mov r10, rax + mov r11, rdx + lea r0, [errmsg_stack] + jmp .fail +.stack_ok: + + ; check for failure to preserve registers +%assign i 14 +%rep 15-free_regs + cmp r %+ i, [n %+ i] + setne r4b + lea r3d, [r4+r3*2] + %assign i i-1 +%endrep +%if WIN64 + lea r0, [rsp+32] ; account for shadow space + mov r5, r0 + test r3d, r3d + jz .gpr_ok +%else + test r3d, r3d + jz .gpr_xmm_ok + mov r0, rsp +%endif +%assign i free_regs +%rep 15-free_regs +%if i < 10 + mov dword [r0], " r0" + (i << 16) + lea r4, [r0+3] +%else + mov dword [r0], " r10" + ((i - 10) << 24) + lea r4, [r0+4] +%endif + test r3b, 1 << (i - free_regs) + cmovnz r0, r4 + %assign i i+1 +%endrep +%if WIN64 ; xmm registers +.gpr_ok: +%assign i 6 +%rep 16-6 + pxor m %+ i, [x %+ i] + %assign i i+1 +%endrep + packsswb m6, m7 + packsswb m8, m9 + packsswb m10, m11 + packsswb m12, m13 + packsswb m14, m15 + packsswb m6, m6 + packsswb m8, m10 + packsswb m12, m14 + packsswb m6, m6 + packsswb m8, m12 + packsswb m6, m8 + pxor m7, m7 + pcmpeqb m6, m7 + pmovmskb r3d, m6 + cmp r3d, 0xffff + je .xmm_ok + mov r7d, " xmm" +%assign i 6 +%rep 16-6 + mov [r0+0], r7d +%if i < 10 + mov byte [r0+4], "0" + i + lea r4, [r0+5] +%else + mov word [r0+4], "10" + ((i - 10) << 8) + lea r4, [r0+6] +%endif + test r3d, 1 << i + cmovz r0, r4 + %assign i i+1 +%endrep +.xmm_ok: + cmp r0, r5 + je .gpr_xmm_ok + mov byte [r0], 0 + mov r11, rdx + mov r1, r5 +%else + mov byte [r0], 0 + mov r11, rdx + mov r1, rsp +%endif + mov r10, rax + lea r0, [errmsg_register] + jmp .fail +.gpr_xmm_ok: + ; Check for dirty YMM state, i.e. missing vzeroupper + mov ecx, [check_vzeroupper] + test ecx, ecx + jz .ok ; not supported, skip + mov r10, rax + mov r11, rdx + xgetbv + test al, 0x04 + jz .restore_retval ; clean ymm state + lea r0, [errmsg_vzeroupper] + vzeroupper +.fail: + ; Call fail_func() with a descriptive message to mark it as a failure. + xor eax, eax + call fail_func +.restore_retval: + mov rax, r10 + mov rdx, r11 +.ok: + RET + +; trigger a warmup of vector units +%macro WARMUP 0 +cglobal warmup, 0, 0 + xorps m0, m0 + mulps m0, m0 + RET +%endmacro + +INIT_YMM avx2 +WARMUP +INIT_ZMM avx512 +WARMUP + +%else + +; just random numbers to reduce the chance of incidental match +%assign n3 0x6549315c +%assign n4 0xe02f3e23 +%assign n5 0xb78d0d1d +%assign n6 0x33627ba7 + +;----------------------------------------------------------------------------- +; void checkasm_checked_call(void *func, ...) +;----------------------------------------------------------------------------- +cglobal checked_call, 1, 7 + mov r3, [esp+stack_offset] ; return address + mov r1, [esp+stack_offset+17*4] ; num_stack_params + mov r2, 27 + not r3 + sub r2, r1 +.push_canary: + push r3 + dec r2 + jg .push_canary +.push_parameter: + push dword [esp+32*4] + dec r1 + jg .push_parameter + mov r3, n3 + mov r4, n4 + mov r5, n5 + mov r6, n6 + call r0 + + ; check for failure to preserve registers + cmp r3, n3 + setne r3h + cmp r4, n4 + setne r3b + shl r3d, 16 + cmp r5, n5 + setne r3h + cmp r6, n6 + setne r3b + test r3, r3 + jz .gpr_ok + lea r1, [esp+16] + mov [esp+4], r1 +%assign i 3 +%rep 4 + mov dword [r1], " r0" + (i << 16) + lea r4, [r1+3] + test r3, 1 << ((6 - i) * 8) + cmovnz r1, r4 + %assign i i+1 +%endrep + mov byte [r1], 0 + mov r5, eax + mov r6, edx + LEA r1, errmsg_register + jmp .fail +.gpr_ok: + ; check for stack corruption + mov r3, [esp+48*4] ; num_stack_params + mov r6, [esp+31*4] ; return address + mov r4, [esp+r3*4] + sub r3, 26 + not r6 + xor r4, r6 +.check_canary: + mov r5, [esp+(r3+27)*4] + xor r5, r6 + or r4, r5 + inc r3 + jl .check_canary + mov r5, eax + mov r6, edx + test r4, r4 + jz .stack_ok + LEA r1, errmsg_stack + jmp .fail +.stack_ok: + ; check for dirty YMM state, i.e. missing vzeroupper + LEA ecx, check_vzeroupper + mov ecx, [ecx] + test ecx, ecx + jz .ok ; not supported, skip + xgetbv + test al, 0x04 + jz .ok ; clean ymm state + LEA r1, errmsg_vzeroupper + vzeroupper +.fail: + mov [esp], r1 + call fail_func +.ok: + add esp, 27*4 + mov eax, r5 + mov edx, r6 + RET + +%endif ; ARCH_X86_64 |