summaryrefslogtreecommitdiffstats
path: root/media/ffvpx/libavcodec/x86
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--media/ffvpx/libavcodec/x86/h264_intrapred.asm31
-rw-r--r--media/ffvpx/libavcodec/x86/h264_intrapred_init.c14
-rw-r--r--media/ffvpx/libavcodec/x86/moz.build2
-rw-r--r--media/ffvpx/libavcodec/x86/simple_idct.asm37
-rw-r--r--media/ffvpx/libavcodec/x86/vp56_arith.h53
-rw-r--r--media/ffvpx/libavcodec/x86/vp8dsp.asm30
-rw-r--r--media/ffvpx/libavcodec/x86/vp9itxfm.asm2
-rw-r--r--media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm12
8 files changed, 45 insertions, 136 deletions
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred.asm b/media/ffvpx/libavcodec/x86/h264_intrapred.asm
index 8a38ba2bb5..a8a630dbe6 100644
--- a/media/ffvpx/libavcodec/x86/h264_intrapred.asm
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred.asm
@@ -86,8 +86,6 @@ cglobal pred16x16_horizontal_8, 2,3
punpcklbw m1, m1
SPLATW m0, m0, 3
SPLATW m1, m1, 3
- mova [r0+r1*0+8], m0
- mova [r0+r1*1+8], m1
%endif
mova [r0+r1*0], m0
@@ -98,7 +96,7 @@ cglobal pred16x16_horizontal_8, 2,3
RET
%endmacro
-INIT_MMX mmxext
+INIT_XMM sse2
PRED16x16_H
INIT_XMM ssse3
PRED16x16_H
@@ -568,17 +566,17 @@ H264_PRED8x8_PLANE
; void ff_pred8x8_vertical_8(uint8_t *src, ptrdiff_t stride)
;-----------------------------------------------------------------------------
-INIT_MMX mmx
+INIT_XMM sse2
cglobal pred8x8_vertical_8, 2,2
sub r0, r1
- movq mm0, [r0]
+ movq m0, [r0]
%rep 3
- movq [r0+r1*1], mm0
- movq [r0+r1*2], mm0
+ movq [r0+r1*1], m0
+ movq [r0+r1*2], m0
lea r0, [r0+r1*2]
%endrep
- movq [r0+r1*1], mm0
- movq [r0+r1*2], mm0
+ movq [r0+r1*1], m0
+ movq [r0+r1*2], m0
RET
;-----------------------------------------------------------------------------
@@ -1313,10 +1311,7 @@ PRED8x8L_DOWN_RIGHT
;-----------------------------------------------------------------------------
%macro PRED8x8L_VERTICAL_RIGHT 0
-cglobal pred8x8l_vertical_right_8, 4,5,7
- ; manually spill XMM registers for Win64 because
- ; the code here is initialized with INIT_MMX
- WIN64_SPILL_XMM 7
+cglobal pred8x8l_vertical_right_8, 4,5,6
sub r0, r3
lea r4, [r0+r3*2]
movq mm0, [r0+r3*1-8]
@@ -1386,7 +1381,6 @@ cglobal pred8x8l_vertical_right_8, 4,5,7
movq2dq xmm4, mm6
pslldq xmm4, 8
por xmm0, xmm4
- movdqa xmm6, [pw_ff00]
movdqa xmm1, xmm0
lea r2, [r1+r3*2]
movdqa xmm2, xmm0
@@ -1396,15 +1390,16 @@ cglobal pred8x8l_vertical_right_8, 4,5,7
pavgb xmm2, xmm0
INIT_XMM cpuname
PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5
- pandn xmm6, xmm4
+ movdqa xmm0, [pw_ff00]
+ pandn xmm0, xmm4
movdqa xmm5, xmm4
psrlw xmm4, 8
- packuswb xmm6, xmm4
- movhlps xmm4, xmm6
+ packuswb xmm0, xmm4
+ movhlps xmm4, xmm0
movhps [r0+r3*2], xmm5
movhps [r0+r3*1], xmm2
psrldq xmm5, 4
- movss xmm5, xmm6
+ movss xmm5, xmm0
psrldq xmm2, 4
movss xmm2, xmm4
lea r0, [r2+r3*2]
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred_init.c b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c
index ee46927a24..aa9bc721f0 100644
--- a/media/ffvpx/libavcodec/x86/h264_intrapred_init.c
+++ b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c
@@ -100,7 +100,7 @@ PRED16x16(horizontal, 10, sse2)
/* 8-bit versions */
PRED16x16(vertical, 8, sse)
-PRED16x16(horizontal, 8, mmxext)
+PRED16x16(horizontal, 8, sse2)
PRED16x16(horizontal, 8, ssse3)
PRED16x16(dc, 8, sse2)
PRED16x16(dc, 8, ssse3)
@@ -116,7 +116,7 @@ PRED16x16(tm_vp8, 8, avx2)
PRED8x8(top_dc, 8, mmxext)
PRED8x8(dc_rv40, 8, mmxext)
PRED8x8(dc, 8, mmxext)
-PRED8x8(vertical, 8, mmx)
+PRED8x8(vertical, 8, sse2)
PRED8x8(horizontal, 8, mmxext)
PRED8x8(horizontal, 8, ssse3)
PRED8x8(plane, 8, sse2)
@@ -163,14 +163,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
int cpu_flags = av_get_cpu_flags();
if (bit_depth == 8) {
- if (EXTERNAL_MMX(cpu_flags)) {
- if (chroma_format_idc <= 1) {
- h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_mmx;
- }
- }
-
if (EXTERNAL_MMXEXT(cpu_flags)) {
- h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext;
if (chroma_format_idc <= 1)
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmxext;
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_mmxext;
@@ -210,12 +203,15 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id,
}
if (EXTERNAL_SSE2(cpu_flags)) {
+ h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_sse2;
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2;
h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2;
h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2;
h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_sse2;
h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_sse2;
h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_sse2;
+ if (chroma_format_idc <= 1)
+ h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_sse2;
if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_sse2;
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_sse2;
diff --git a/media/ffvpx/libavcodec/x86/moz.build b/media/ffvpx/libavcodec/x86/moz.build
index bd721affad..c358b5fdb5 100644
--- a/media/ffvpx/libavcodec/x86/moz.build
+++ b/media/ffvpx/libavcodec/x86/moz.build
@@ -37,6 +37,8 @@ SOURCES += [
'vp9mc_16bpp.asm',
]
+LOCAL_INCLUDES += [ "../" ]
+
if CONFIG['TARGET_CPU'] == 'x86':
SOURCES += [ 'simple_idct.asm' ]
diff --git a/media/ffvpx/libavcodec/x86/simple_idct.asm b/media/ffvpx/libavcodec/x86/simple_idct.asm
index 982b2f0bbb..c79519372a 100644
--- a/media/ffvpx/libavcodec/x86/simple_idct.asm
+++ b/media/ffvpx/libavcodec/x86/simple_idct.asm
@@ -783,68 +783,33 @@ SECTION .text
%macro PUT_PIXELS_CLAMPED_HALF 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*2+%1]
-%if mmsize == 8
- mova m2, [blockq+mmsize*4+%1]
- mova m3, [blockq+mmsize*6+%1]
-%endif
packuswb m0, [blockq+mmsize*1+%1]
packuswb m1, [blockq+mmsize*3+%1]
-%if mmsize == 8
- packuswb m2, [blockq+mmsize*5+%1]
- packuswb m3, [blockq+mmsize*7+%1]
- movq [pixelsq], m0
- movq [lsizeq+pixelsq], m1
- movq [2*lsizeq+pixelsq], m2
- movq [lsize3q+pixelsq], m3
-%else
movq [pixelsq], m0
movhps [lsizeq+pixelsq], m0
movq [2*lsizeq+pixelsq], m1
movhps [lsize3q+pixelsq], m1
-%endif
%endmacro
%macro ADD_PIXELS_CLAMPED 1
mova m0, [blockq+mmsize*0+%1]
mova m1, [blockq+mmsize*1+%1]
-%if mmsize == 8
- mova m5, [blockq+mmsize*2+%1]
- mova m6, [blockq+mmsize*3+%1]
-%endif
movq m2, [pixelsq]
movq m3, [pixelsq+lsizeq]
-%if mmsize == 8
- mova m7, m2
- punpcklbw m2, m4
- punpckhbw m7, m4
- paddsw m0, m2
- paddsw m1, m7
- mova m7, m3
- punpcklbw m3, m4
- punpckhbw m7, m4
- paddsw m5, m3
- paddsw m6, m7
-%else
punpcklbw m2, m4
punpcklbw m3, m4
paddsw m0, m2
paddsw m1, m3
-%endif
packuswb m0, m1
-%if mmsize == 8
- packuswb m5, m6
- movq [pixelsq], m0
- movq [pixelsq+lsizeq], m5
-%else
movq [pixelsq], m0
movhps [pixelsq+lsizeq], m0
-%endif
%endmacro
INIT_MMX mmx
cglobal simple_idct, 1, 2, 8, 128, block, t0
IDCT
+ emms
RET
INIT_XMM sse2
diff --git a/media/ffvpx/libavcodec/x86/vp56_arith.h b/media/ffvpx/libavcodec/x86/vp56_arith.h
deleted file mode 100644
index 9f7639980c..0000000000
--- a/media/ffvpx/libavcodec/x86/vp56_arith.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * VP5 and VP6 compatible video decoder (arith decoder)
- *
- * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
- * Copyright (C) 2010 Eli Friedman
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_X86_VP56_ARITH_H
-#define AVCODEC_X86_VP56_ARITH_H
-
-#if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS
-#include "libavutil/attributes.h"
-
-#define vp56_rac_get_prob vp56_rac_get_prob
-static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
-{
- unsigned int code_word = vp56_rac_renorm(c);
- unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
- unsigned int low_shift = low << 16;
- int bit = 0;
- c->code_word = code_word;
-
- __asm__(
- "subl %4, %1 \n\t"
- "subl %3, %2 \n\t"
- "setae %b0 \n\t"
- "cmovb %4, %1 \n\t"
- "cmovb %5, %2 \n\t"
- : "+q"(bit), "+&r"(c->high), "+&r"(c->code_word)
- : "r"(low_shift), "r"(low), "r"(code_word)
- );
-
- return bit;
-}
-#endif
-
-#endif /* AVCODEC_X86_VP56_ARITH_H */
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm
index 6ac5a7721b..231c21ea0d 100644
--- a/media/ffvpx/libavcodec/x86/vp8dsp.asm
+++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm
@@ -114,7 +114,7 @@ bilinear_filter_vb_m: times 8 db 7, 1
times 8 db 2, 6
times 8 db 1, 7
-%ifdef PIC
+%if PIC
%define fourtap_filter_hw picregq
%define sixtap_filter_hw picregq
%define fourtap_filter_hb picregq
@@ -166,7 +166,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h
lea mxd, [mxq*3]
mova m3, [filter_h6_shuf2]
mova m4, [filter_h6_shuf3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hb_m]
%endif
mova m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes
@@ -207,7 +207,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
mova m2, [pw_256]
mova m3, [filter_h2_shuf]
mova m4, [filter_h4_shuf]
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_hb_m]
%endif
mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes
@@ -234,7 +234,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_hb_m]
%endif
mova m5, [fourtap_filter_hb+myq-16]
@@ -272,7 +272,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
lea myd, [myq*3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hb_m]
%endif
lea myq, [sixtap_filter_hb+myq*8]
@@ -326,7 +326,7 @@ FILTER_SSSE3 8
INIT_MMX mmxext
cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_hw_m]
%endif
movq mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words
@@ -374,7 +374,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
INIT_MMX mmxext
cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
lea mxd, [mxq*3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hw_m]
%endif
movq mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words
@@ -431,7 +431,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
INIT_XMM sse2
cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 5
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_v_m]
%endif
lea mxq, [fourtap_filter_v+mxq-32]
@@ -480,7 +480,7 @@ INIT_XMM sse2
cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
lea mxd, [mxq*3]
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_v_m]
%endif
lea mxq, [sixtap_filter_v+mxq-96]
@@ -543,7 +543,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h
; 4x4 block, V-only 4-tap filter
cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 5
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_v_m]
%endif
lea myq, [fourtap_filter_v+myq-32]
@@ -597,7 +597,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
lea myq, [myq*3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_v_m]
%endif
lea myq, [sixtap_filter_v+myq-96]
@@ -667,7 +667,7 @@ FILTER_V 8
%if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
@@ -697,7 +697,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, p
%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vw_m]
%endif
pxor m6, m6
@@ -743,7 +743,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
%if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
@@ -773,7 +773,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride
%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vw_m]
%endif
pxor m6, m6
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm.asm b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
index 2c63fe514a..2f290f2f88 100644
--- a/media/ffvpx/libavcodec/x86/vp9itxfm.asm
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm.asm
@@ -330,7 +330,9 @@ IDCT_4x4_FN ssse3
INIT_MMX %5
cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob
%if WIN64 && notcpuflag(ssse3)
+INIT_XMM cpuname
WIN64_SPILL_XMM 8
+INIT_MMX cpuname
%endif
movdqa xmm5, [pd_8192]
mova m0, [blockq+ 0]
diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm
index 902685edf6..ebe6222285 100644
--- a/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm
+++ b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm
@@ -303,7 +303,9 @@ IDCT4_10_FN
%macro IADST4_FN 4
cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob
%if WIN64 && notcpuflag(ssse3)
+INIT_XMM cpuname
WIN64_SPILL_XMM 8
+INIT_MMX cpuname
%endif
movdqa xmm5, [pd_8192]
mova m0, [blockq+0*16+0]
@@ -672,7 +674,7 @@ cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [default_8x8]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -921,7 +923,7 @@ cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [%5_8x8]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -1128,7 +1130,7 @@ cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [default_16x16]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -1445,7 +1447,7 @@ cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [%7_16x16]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -1958,7 +1960,7 @@ cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [default_32x32]
movzx cntd, byte [ptrq+cntq-1]
%else