summaryrefslogtreecommitdiffstats
path: root/third_party/dav1d/src/x86/mc_avx2.asm
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/dav1d/src/x86/mc_avx2.asm')
-rw-r--r--third_party/dav1d/src/x86/mc_avx2.asm33
1 files changed, 9 insertions, 24 deletions
diff --git a/third_party/dav1d/src/x86/mc_avx2.asm b/third_party/dav1d/src/x86/mc_avx2.asm
index 3b208033bd..58e3cb5af1 100644
--- a/third_party/dav1d/src/x86/mc_avx2.asm
+++ b/third_party/dav1d/src/x86/mc_avx2.asm
@@ -1259,7 +1259,6 @@ cglobal prep_bilin_8bpc, 3, 7, 0, tmp, src, stride, w, h, mxy, stride3
.hv:
; (16 * src[x] + (my * (src[x + src_stride] - src[x])) + 8) >> 4
; = src[x] + (((my * (src[x + src_stride] - src[x])) + 8) >> 4)
- %assign stack_offset stack_offset - stack_size_padded
WIN64_SPILL_XMM 7
movzx wd, word [r6+wq*2+table_offset(prep, _bilin_hv)]
shl mxyd, 11
@@ -1620,7 +1619,6 @@ cglobal put_8tap_8bpc, 4, 9, 0, dst, ds, src, ss, w, h, mx, my, ss3
jg .h_loop
RET
.v:
- %assign stack_offset stack_offset - stack_size_padded
WIN64_SPILL_XMM 16
movzx mxd, myb
shr myd, 16
@@ -1834,7 +1832,6 @@ cglobal put_8tap_8bpc, 4, 9, 0, dst, ds, src, ss, w, h, mx, my, ss3
jg .v_w16_loop0
RET
.hv:
- %assign stack_offset stack_offset - stack_size_padded
WIN64_SPILL_XMM 16
cmp wd, 4
jg .hv_w8
@@ -2247,7 +2244,6 @@ cglobal prep_8tap_8bpc, 3, 8, 0, tmp, src, stride, w, h, mx, my, stride3
jg .h_loop
RET
.v:
- %assign stack_offset stack_offset - stack_size_padded
WIN64_SPILL_XMM 16
movzx mxd, myb ; Select 4-tap/8-tap filter multipliers.
shr myd, 16 ; Note that the code is 8-tap only, having
@@ -2430,8 +2426,6 @@ cglobal prep_8tap_8bpc, 3, 8, 0, tmp, src, stride, w, h, mx, my, stride3
jg .v_w16_loop0
RET
.hv:
- %assign stack_offset stack_offset - stack_size_padded
- %assign stack_size_padded 0
WIN64_SPILL_XMM 16
cmp wd, 4
je .hv_w4
@@ -4108,10 +4102,9 @@ cglobal warp_affine_8x8t_8bpc, 0, 14, 0, tmp, ts
cglobal warp_affine_8x8_8bpc, 0, 14, 0, dst, ds, src, ss, abcd, mx, tmp2, alpha, \
beta, filter, tmp1, delta, my, gamma
%if WIN64
- sub rsp, 0xa0
%assign xmm_regs_used 16
%assign stack_size_padded 0xa0
- %assign stack_offset stack_offset+stack_size_padded
+ SUB rsp, stack_size_padded
%endif
call .main
jmp .start
@@ -4134,21 +4127,13 @@ cglobal warp_affine_8x8_8bpc, 0, 14, 0, dst, ds, src, ss, abcd, mx, tmp2, alpha,
RET
ALIGN function_align
.main:
- ; Stack args offset by one (r4m -> r5m etc.) due to call
-%if WIN64
- mov abcdq, r5m
- mov mxd, r6m
- movaps [rsp+stack_offset+0x10], xmm6
- movaps [rsp+stack_offset+0x20], xmm7
- movaps [rsp+0x28], xmm8
- movaps [rsp+0x38], xmm9
- movaps [rsp+0x48], xmm10
- movaps [rsp+0x58], xmm11
- movaps [rsp+0x68], xmm12
- movaps [rsp+0x78], xmm13
- movaps [rsp+0x88], xmm14
- movaps [rsp+0x98], xmm15
-%endif
+ ; Stack is offset due to call
+ %assign stack_offset stack_offset + gprsize
+ %assign stack_size stack_size + gprsize
+ %assign stack_size_padded stack_size_padded + gprsize
+ movifnidn abcdq, abcdmp
+ movifnidn mxd, mxm
+ WIN64_PUSH_XMM
movsx alphad, word [abcdq+2*0]
movsx betad, word [abcdq+2*1]
mova m12, [warp_8x8_shufA]
@@ -4162,7 +4147,7 @@ ALIGN function_align
lea tmp2d, [alphaq*3]
sub srcq, tmp1q ; src -= src_stride*3 + 3
sub betad, tmp2d ; beta -= alpha*3
- mov myd, r7m
+ mov myd, r6m
call .h
psrld m1, m0, 16
call .h