summaryrefslogtreecommitdiffstats
path: root/third_party/dav1d/tests/checkasm/x86/checkasm.asm
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/dav1d/tests/checkasm/x86/checkasm.asm')
-rw-r--r--third_party/dav1d/tests/checkasm/x86/checkasm.asm475
1 files changed, 475 insertions, 0 deletions
diff --git a/third_party/dav1d/tests/checkasm/x86/checkasm.asm b/third_party/dav1d/tests/checkasm/x86/checkasm.asm
new file mode 100644
index 0000000000..8f19ef97f7
--- /dev/null
+++ b/third_party/dav1d/tests/checkasm/x86/checkasm.asm
@@ -0,0 +1,475 @@
+; Copyright © 2018, VideoLAN and dav1d authors
+; Copyright © 2018, Two Orioles, LLC
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;
+; 1. Redistributions of source code must retain the above copyright notice, this
+; list of conditions and the following disclaimer.
+;
+; 2. Redistributions in binary form must reproduce the above copyright notice,
+; this list of conditions and the following disclaimer in the documentation
+; and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "config.asm"
+%undef private_prefix
+%define private_prefix checkasm
+%include "ext/x86/x86inc.asm"
+
+SECTION_RODATA 16
+
+%if ARCH_X86_64
+; just random numbers to reduce the chance of incidental match
+%if WIN64
+x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
+x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
+x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
+x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
+n7: dq 0x21f86d66c8ca00ce
+n8: dq 0x75b6ba21077c48ad
+%endif
+n9: dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
+%endif
+
+errmsg_stack: db "stack corruption", 0
+errmsg_register: db "failed to preserve register:%s", 0
+errmsg_vzeroupper: db "missing vzeroupper", 0
+
+SECTION .bss
+
+check_vzeroupper: resd 1
+
+SECTION .text
+
+cextern fail_func
+
+; max number of args used by any asm function.
+; (max_args % 4) must equal 3 for stack alignment
+%define max_args 15
+
+%if UNIX64
+ DECLARE_REG_TMP 0
+%else
+ DECLARE_REG_TMP 4
+%endif
+
+;-----------------------------------------------------------------------------
+; unsigned checkasm_init_x86(char *name)
+;-----------------------------------------------------------------------------
+cglobal init_x86, 0, 5
+%if ARCH_X86_64
+ push rbx
+%endif
+ movifnidn t0, r0mp
+ mov eax, 0x80000000
+ cpuid
+ cmp eax, 0x80000004
+ jb .no_brand ; processor brand string not supported
+ mov eax, 0x80000002
+ cpuid
+ mov [t0+4* 0], eax
+ mov [t0+4* 1], ebx
+ mov [t0+4* 2], ecx
+ mov [t0+4* 3], edx
+ mov eax, 0x80000003
+ cpuid
+ mov [t0+4* 4], eax
+ mov [t0+4* 5], ebx
+ mov [t0+4* 6], ecx
+ mov [t0+4* 7], edx
+ mov eax, 0x80000004
+ cpuid
+ mov [t0+4* 8], eax
+ mov [t0+4* 9], ebx
+ mov [t0+4*10], ecx
+ mov [t0+4*11], edx
+ xor eax, eax
+ cpuid
+ jmp .check_xcr1
+.no_brand: ; use manufacturer id as a fallback
+ xor eax, eax
+ mov [t0+4*3], eax
+ cpuid
+ mov [t0+4*0], ebx
+ mov [t0+4*1], edx
+ mov [t0+4*2], ecx
+.check_xcr1:
+ test eax, eax
+ jz .end2 ; cpuid leaf 1 not supported
+ mov t0d, eax ; max leaf
+ mov eax, 1
+ cpuid
+ and ecx, 0x18000000
+ cmp ecx, 0x18000000
+ jne .end2 ; osxsave/avx not supported
+ cmp t0d, 13 ; cpuid leaf 13 not supported
+ jb .end2
+ mov t0d, eax ; cpuid signature
+ mov eax, 13
+ mov ecx, 1
+ cpuid
+ test al, 0x04
+ jz .end ; xcr1 not supported
+ mov ecx, 1
+ xgetbv
+ test al, 0x04
+ jnz .end ; always-dirty ymm state
+%if ARCH_X86_64 == 0 && PIC
+ LEA eax, check_vzeroupper
+ mov [eax], ecx
+%else
+ mov [check_vzeroupper], ecx
+%endif
+.end:
+ mov eax, t0d
+.end2:
+%if ARCH_X86_64
+ pop rbx
+%endif
+ RET
+
+%if ARCH_X86_64
+%if WIN64
+ %define stack_param rsp+32 ; shadow space
+ %define num_fn_args rsp+stack_offset+17*8
+ %assign num_reg_args 4
+ %assign free_regs 7
+ %assign clobber_mask_stack_bit 16
+ DECLARE_REG_TMP 4
+%else
+ %define stack_param rsp
+ %define num_fn_args rsp+stack_offset+11*8
+ %assign num_reg_args 6
+ %assign free_regs 9
+ %assign clobber_mask_stack_bit 64
+ DECLARE_REG_TMP 7
+%endif
+
+%macro CLOBBER_UPPER 2 ; reg, mask_bit
+ mov r13d, %1d
+ or r13, r8
+ test r9b, %2
+ cmovnz %1, r13
+%endmacro
+
+cglobal checked_call, 2, 15, 16, max_args*8+64+8
+ mov r10d, [num_fn_args]
+ mov r8, 0xdeadbeef00000000
+ mov r9d, [num_fn_args+r10*8+8] ; clobber_mask
+ mov t0, [num_fn_args+r10*8] ; func
+
+ ; Clobber the upper halves of 32-bit parameters
+ CLOBBER_UPPER r0, 1
+ CLOBBER_UPPER r1, 2
+ CLOBBER_UPPER r2, 4
+ CLOBBER_UPPER r3, 8
+%if UNIX64
+ CLOBBER_UPPER r4, 16
+ CLOBBER_UPPER r5, 32
+%else ; WIN64
+%assign i 6
+%rep 16-6
+ mova m %+ i, [x %+ i]
+ %assign i i+1
+%endrep
+%endif
+
+ xor r11d, r11d
+ sub r10d, num_reg_args
+ cmovs r10d, r11d ; num stack args
+
+ ; write stack canaries to the area above parameters passed on the stack
+ mov r12, [rsp+stack_offset] ; return address
+ not r12
+%assign i 0
+%rep 8 ; 64 bytes
+ mov [stack_param+(r10+i)*8], r12
+ %assign i i+1
+%endrep
+
+ test r10d, r10d
+ jz .stack_setup_done ; no stack parameters
+.copy_stack_parameter:
+ mov r12, [stack_param+stack_offset+8+r11*8]
+ CLOBBER_UPPER r12, clobber_mask_stack_bit
+ shr r9d, 1
+ mov [stack_param+r11*8], r12
+ inc r11d
+ cmp r11d, r10d
+ jl .copy_stack_parameter
+.stack_setup_done:
+
+%assign i 14
+%rep 15-free_regs
+ mov r %+ i, [n %+ i]
+ %assign i i-1
+%endrep
+ call t0
+
+ ; check for stack corruption
+ mov r0d, [num_fn_args]
+ xor r3d, r3d
+ sub r0d, num_reg_args
+ cmovs r0d, r3d ; num stack args
+
+ mov r3, [rsp+stack_offset]
+ mov r4, [stack_param+r0*8]
+ not r3
+ xor r4, r3
+%assign i 1
+%rep 6
+ mov r5, [stack_param+(r0+i)*8]
+ xor r5, r3
+ or r4, r5
+ %assign i i+1
+%endrep
+ xor r3, [stack_param+(r0+7)*8]
+ or r4, r3
+ jz .stack_ok
+ ; Save the return value located in rdx:rax first to prevent clobbering.
+ mov r10, rax
+ mov r11, rdx
+ lea r0, [errmsg_stack]
+ jmp .fail
+.stack_ok:
+
+ ; check for failure to preserve registers
+%assign i 14
+%rep 15-free_regs
+ cmp r %+ i, [n %+ i]
+ setne r4b
+ lea r3d, [r4+r3*2]
+ %assign i i-1
+%endrep
+%if WIN64
+ lea r0, [rsp+32] ; account for shadow space
+ mov r5, r0
+ test r3d, r3d
+ jz .gpr_ok
+%else
+ test r3d, r3d
+ jz .gpr_xmm_ok
+ mov r0, rsp
+%endif
+%assign i free_regs
+%rep 15-free_regs
+%if i < 10
+ mov dword [r0], " r0" + (i << 16)
+ lea r4, [r0+3]
+%else
+ mov dword [r0], " r10" + ((i - 10) << 24)
+ lea r4, [r0+4]
+%endif
+ test r3b, 1 << (i - free_regs)
+ cmovnz r0, r4
+ %assign i i+1
+%endrep
+%if WIN64 ; xmm registers
+.gpr_ok:
+%assign i 6
+%rep 16-6
+ pxor m %+ i, [x %+ i]
+ %assign i i+1
+%endrep
+ packsswb m6, m7
+ packsswb m8, m9
+ packsswb m10, m11
+ packsswb m12, m13
+ packsswb m14, m15
+ packsswb m6, m6
+ packsswb m8, m10
+ packsswb m12, m14
+ packsswb m6, m6
+ packsswb m8, m12
+ packsswb m6, m8
+ pxor m7, m7
+ pcmpeqb m6, m7
+ pmovmskb r3d, m6
+ cmp r3d, 0xffff
+ je .xmm_ok
+ mov r7d, " xmm"
+%assign i 6
+%rep 16-6
+ mov [r0+0], r7d
+%if i < 10
+ mov byte [r0+4], "0" + i
+ lea r4, [r0+5]
+%else
+ mov word [r0+4], "10" + ((i - 10) << 8)
+ lea r4, [r0+6]
+%endif
+ test r3d, 1 << i
+ cmovz r0, r4
+ %assign i i+1
+%endrep
+.xmm_ok:
+ cmp r0, r5
+ je .gpr_xmm_ok
+ mov byte [r0], 0
+ mov r11, rdx
+ mov r1, r5
+%else
+ mov byte [r0], 0
+ mov r11, rdx
+ mov r1, rsp
+%endif
+ mov r10, rax
+ lea r0, [errmsg_register]
+ jmp .fail
+.gpr_xmm_ok:
+ ; Check for dirty YMM state, i.e. missing vzeroupper
+ mov ecx, [check_vzeroupper]
+ test ecx, ecx
+ jz .ok ; not supported, skip
+ mov r10, rax
+ mov r11, rdx
+ xgetbv
+ test al, 0x04
+ jz .restore_retval ; clean ymm state
+ lea r0, [errmsg_vzeroupper]
+ vzeroupper
+.fail:
+ ; Call fail_func() with a descriptive message to mark it as a failure.
+ xor eax, eax
+ call fail_func
+.restore_retval:
+ mov rax, r10
+ mov rdx, r11
+.ok:
+ RET
+
+; trigger a warmup of vector units
+%macro WARMUP 0
+cglobal warmup, 0, 0
+ xorps m0, m0
+ mulps m0, m0
+ RET
+%endmacro
+
+INIT_YMM avx2
+WARMUP
+INIT_ZMM avx512
+WARMUP
+
+%else
+
+; just random numbers to reduce the chance of incidental match
+%assign n3 0x6549315c
+%assign n4 0xe02f3e23
+%assign n5 0xb78d0d1d
+%assign n6 0x33627ba7
+
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+cglobal checked_call, 1, 7
+ mov r3, [esp+stack_offset] ; return address
+ mov r1, [esp+stack_offset+17*4] ; num_stack_params
+ mov r2, 27
+ not r3
+ sub r2, r1
+.push_canary:
+ push r3
+ dec r2
+ jg .push_canary
+.push_parameter:
+ push dword [esp+32*4]
+ dec r1
+ jg .push_parameter
+ mov r3, n3
+ mov r4, n4
+ mov r5, n5
+ mov r6, n6
+ call r0
+
+ ; check for failure to preserve registers
+ cmp r3, n3
+ setne r3h
+ cmp r4, n4
+ setne r3b
+ shl r3d, 16
+ cmp r5, n5
+ setne r3h
+ cmp r6, n6
+ setne r3b
+ test r3, r3
+ jz .gpr_ok
+ lea r1, [esp+16]
+ mov [esp+4], r1
+%assign i 3
+%rep 4
+ mov dword [r1], " r0" + (i << 16)
+ lea r4, [r1+3]
+ test r3, 1 << ((6 - i) * 8)
+ cmovnz r1, r4
+ %assign i i+1
+%endrep
+ mov byte [r1], 0
+ mov r5, eax
+ mov r6, edx
+ LEA r1, errmsg_register
+ jmp .fail
+.gpr_ok:
+ ; check for stack corruption
+ mov r3, [esp+48*4] ; num_stack_params
+ mov r6, [esp+31*4] ; return address
+ mov r4, [esp+r3*4]
+ sub r3, 26
+ not r6
+ xor r4, r6
+.check_canary:
+ mov r5, [esp+(r3+27)*4]
+ xor r5, r6
+ or r4, r5
+ inc r3
+ jl .check_canary
+ mov r5, eax
+ mov r6, edx
+ test r4, r4
+ jz .stack_ok
+ LEA r1, errmsg_stack
+ jmp .fail
+.stack_ok:
+ ; check for dirty YMM state, i.e. missing vzeroupper
+ LEA ecx, check_vzeroupper
+ mov ecx, [ecx]
+ test ecx, ecx
+ jz .ok ; not supported, skip
+ xgetbv
+ test al, 0x04
+ jz .ok ; clean ymm state
+ LEA r1, errmsg_vzeroupper
+ vzeroupper
+.fail:
+ mov [esp], r1
+ call fail_func
+.ok:
+ add esp, 27*4
+ mov eax, r5
+ mov edx, r6
+ RET
+
+%endif ; ARCH_X86_64