diff options
Diffstat (limited to '')
-rw-r--r-- | media/ffvpx/libavcodec/x86/h264_intrapred.asm | 31 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/h264_intrapred_init.c | 14 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/moz.build | 2 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/simple_idct.asm | 37 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/vp56_arith.h | 53 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/vp8dsp.asm | 30 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/vp9itxfm.asm | 2 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm | 12 |
8 files changed, 45 insertions, 136 deletions
diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred.asm b/media/ffvpx/libavcodec/x86/h264_intrapred.asm index 8a38ba2bb5..a8a630dbe6 100644 --- a/media/ffvpx/libavcodec/x86/h264_intrapred.asm +++ b/media/ffvpx/libavcodec/x86/h264_intrapred.asm @@ -86,8 +86,6 @@ cglobal pred16x16_horizontal_8, 2,3 punpcklbw m1, m1 SPLATW m0, m0, 3 SPLATW m1, m1, 3 - mova [r0+r1*0+8], m0 - mova [r0+r1*1+8], m1 %endif mova [r0+r1*0], m0 @@ -98,7 +96,7 @@ cglobal pred16x16_horizontal_8, 2,3 RET %endmacro -INIT_MMX mmxext +INIT_XMM sse2 PRED16x16_H INIT_XMM ssse3 PRED16x16_H @@ -568,17 +566,17 @@ H264_PRED8x8_PLANE ; void ff_pred8x8_vertical_8(uint8_t *src, ptrdiff_t stride) ;----------------------------------------------------------------------------- -INIT_MMX mmx +INIT_XMM sse2 cglobal pred8x8_vertical_8, 2,2 sub r0, r1 - movq mm0, [r0] + movq m0, [r0] %rep 3 - movq [r0+r1*1], mm0 - movq [r0+r1*2], mm0 + movq [r0+r1*1], m0 + movq [r0+r1*2], m0 lea r0, [r0+r1*2] %endrep - movq [r0+r1*1], mm0 - movq [r0+r1*2], mm0 + movq [r0+r1*1], m0 + movq [r0+r1*2], m0 RET ;----------------------------------------------------------------------------- @@ -1313,10 +1311,7 @@ PRED8x8L_DOWN_RIGHT ;----------------------------------------------------------------------------- %macro PRED8x8L_VERTICAL_RIGHT 0 -cglobal pred8x8l_vertical_right_8, 4,5,7 - ; manually spill XMM registers for Win64 because - ; the code here is initialized with INIT_MMX - WIN64_SPILL_XMM 7 +cglobal pred8x8l_vertical_right_8, 4,5,6 sub r0, r3 lea r4, [r0+r3*2] movq mm0, [r0+r3*1-8] @@ -1386,7 +1381,6 @@ cglobal pred8x8l_vertical_right_8, 4,5,7 movq2dq xmm4, mm6 pslldq xmm4, 8 por xmm0, xmm4 - movdqa xmm6, [pw_ff00] movdqa xmm1, xmm0 lea r2, [r1+r3*2] movdqa xmm2, xmm0 @@ -1396,15 +1390,16 @@ cglobal pred8x8l_vertical_right_8, 4,5,7 pavgb xmm2, xmm0 INIT_XMM cpuname PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5 - pandn xmm6, xmm4 + movdqa xmm0, [pw_ff00] + pandn xmm0, xmm4 movdqa xmm5, xmm4 psrlw xmm4, 8 - packuswb xmm6, xmm4 - movhlps xmm4, xmm6 + packuswb xmm0, xmm4 + movhlps xmm4, xmm0 movhps [r0+r3*2], xmm5 movhps [r0+r3*1], xmm2 psrldq xmm5, 4 - movss xmm5, xmm6 + movss xmm5, xmm0 psrldq xmm2, 4 movss xmm2, xmm4 lea r0, [r2+r3*2] diff --git a/media/ffvpx/libavcodec/x86/h264_intrapred_init.c b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c index ee46927a24..aa9bc721f0 100644 --- a/media/ffvpx/libavcodec/x86/h264_intrapred_init.c +++ b/media/ffvpx/libavcodec/x86/h264_intrapred_init.c @@ -100,7 +100,7 @@ PRED16x16(horizontal, 10, sse2) /* 8-bit versions */ PRED16x16(vertical, 8, sse) -PRED16x16(horizontal, 8, mmxext) +PRED16x16(horizontal, 8, sse2) PRED16x16(horizontal, 8, ssse3) PRED16x16(dc, 8, sse2) PRED16x16(dc, 8, ssse3) @@ -116,7 +116,7 @@ PRED16x16(tm_vp8, 8, avx2) PRED8x8(top_dc, 8, mmxext) PRED8x8(dc_rv40, 8, mmxext) PRED8x8(dc, 8, mmxext) -PRED8x8(vertical, 8, mmx) +PRED8x8(vertical, 8, sse2) PRED8x8(horizontal, 8, mmxext) PRED8x8(horizontal, 8, ssse3) PRED8x8(plane, 8, sse2) @@ -163,14 +163,7 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, int cpu_flags = av_get_cpu_flags(); if (bit_depth == 8) { - if (EXTERNAL_MMX(cpu_flags)) { - if (chroma_format_idc <= 1) { - h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_mmx; - } - } - if (EXTERNAL_MMXEXT(cpu_flags)) { - h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_mmxext; if (chroma_format_idc <= 1) h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_8_mmxext; h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_8_mmxext; @@ -210,12 +203,15 @@ av_cold void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, } if (EXTERNAL_SSE2(cpu_flags)) { + h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_8_sse2; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_8_sse2; h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_8_sse2; h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_8_sse2; h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_8_sse2; h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_8_sse2; h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_8_sse2; + if (chroma_format_idc <= 1) + h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_8_sse2; if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_8_sse2; h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_8_sse2; diff --git a/media/ffvpx/libavcodec/x86/moz.build b/media/ffvpx/libavcodec/x86/moz.build index bd721affad..c358b5fdb5 100644 --- a/media/ffvpx/libavcodec/x86/moz.build +++ b/media/ffvpx/libavcodec/x86/moz.build @@ -37,6 +37,8 @@ SOURCES += [ 'vp9mc_16bpp.asm', ] +LOCAL_INCLUDES += [ "../" ] + if CONFIG['TARGET_CPU'] == 'x86': SOURCES += [ 'simple_idct.asm' ] diff --git a/media/ffvpx/libavcodec/x86/simple_idct.asm b/media/ffvpx/libavcodec/x86/simple_idct.asm index 982b2f0bbb..c79519372a 100644 --- a/media/ffvpx/libavcodec/x86/simple_idct.asm +++ b/media/ffvpx/libavcodec/x86/simple_idct.asm @@ -783,68 +783,33 @@ SECTION .text %macro PUT_PIXELS_CLAMPED_HALF 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*2+%1] -%if mmsize == 8 - mova m2, [blockq+mmsize*4+%1] - mova m3, [blockq+mmsize*6+%1] -%endif packuswb m0, [blockq+mmsize*1+%1] packuswb m1, [blockq+mmsize*3+%1] -%if mmsize == 8 - packuswb m2, [blockq+mmsize*5+%1] - packuswb m3, [blockq+mmsize*7+%1] - movq [pixelsq], m0 - movq [lsizeq+pixelsq], m1 - movq [2*lsizeq+pixelsq], m2 - movq [lsize3q+pixelsq], m3 -%else movq [pixelsq], m0 movhps [lsizeq+pixelsq], m0 movq [2*lsizeq+pixelsq], m1 movhps [lsize3q+pixelsq], m1 -%endif %endmacro %macro ADD_PIXELS_CLAMPED 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*1+%1] -%if mmsize == 8 - mova m5, [blockq+mmsize*2+%1] - mova m6, [blockq+mmsize*3+%1] -%endif movq m2, [pixelsq] movq m3, [pixelsq+lsizeq] -%if mmsize == 8 - mova m7, m2 - punpcklbw m2, m4 - punpckhbw m7, m4 - paddsw m0, m2 - paddsw m1, m7 - mova m7, m3 - punpcklbw m3, m4 - punpckhbw m7, m4 - paddsw m5, m3 - paddsw m6, m7 -%else punpcklbw m2, m4 punpcklbw m3, m4 paddsw m0, m2 paddsw m1, m3 -%endif packuswb m0, m1 -%if mmsize == 8 - packuswb m5, m6 - movq [pixelsq], m0 - movq [pixelsq+lsizeq], m5 -%else movq [pixelsq], m0 movhps [pixelsq+lsizeq], m0 -%endif %endmacro INIT_MMX mmx cglobal simple_idct, 1, 2, 8, 128, block, t0 IDCT + emms RET INIT_XMM sse2 diff --git a/media/ffvpx/libavcodec/x86/vp56_arith.h b/media/ffvpx/libavcodec/x86/vp56_arith.h deleted file mode 100644 index 9f7639980c..0000000000 --- a/media/ffvpx/libavcodec/x86/vp56_arith.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - * VP5 and VP6 compatible video decoder (arith decoder) - * - * Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org> - * Copyright (C) 2010 Eli Friedman - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef AVCODEC_X86_VP56_ARITH_H -#define AVCODEC_X86_VP56_ARITH_H - -#if HAVE_INLINE_ASM && HAVE_FAST_CMOV && HAVE_6REGS -#include "libavutil/attributes.h" - -#define vp56_rac_get_prob vp56_rac_get_prob -static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) -{ - unsigned int code_word = vp56_rac_renorm(c); - unsigned int low = 1 + (((c->high - 1) * prob) >> 8); - unsigned int low_shift = low << 16; - int bit = 0; - c->code_word = code_word; - - __asm__( - "subl %4, %1 \n\t" - "subl %3, %2 \n\t" - "setae %b0 \n\t" - "cmovb %4, %1 \n\t" - "cmovb %5, %2 \n\t" - : "+q"(bit), "+&r"(c->high), "+&r"(c->code_word) - : "r"(low_shift), "r"(low), "r"(code_word) - ); - - return bit; -} -#endif - -#endif /* AVCODEC_X86_VP56_ARITH_H */ diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm index 6ac5a7721b..231c21ea0d 100644 --- a/media/ffvpx/libavcodec/x86/vp8dsp.asm +++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm @@ -114,7 +114,7 @@ bilinear_filter_vb_m: times 8 db 7, 1 times 8 db 2, 6 times 8 db 1, 7 -%ifdef PIC +%if PIC %define fourtap_filter_hw picregq %define sixtap_filter_hw picregq %define fourtap_filter_hb picregq @@ -166,7 +166,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h lea mxd, [mxq*3] mova m3, [filter_h6_shuf2] mova m4, [filter_h6_shuf3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hb_m] %endif mova m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes @@ -207,7 +207,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h mova m2, [pw_256] mova m3, [filter_h2_shuf] mova m4, [filter_h4_shuf] -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_hb_m] %endif mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes @@ -234,7 +234,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_hb_m] %endif mova m5, [fourtap_filter_hb+myq-16] @@ -272,7 +272,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my lea myd, [myq*3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hb_m] %endif lea myq, [sixtap_filter_hb+myq*8] @@ -326,7 +326,7 @@ FILTER_SSSE3 8 INIT_MMX mmxext cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_hw_m] %endif movq mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words @@ -374,7 +374,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he INIT_MMX mmxext cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg lea mxd, [mxq*3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hw_m] %endif movq mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words @@ -431,7 +431,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he INIT_XMM sse2 cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 5 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_v_m] %endif lea mxq, [fourtap_filter_v+mxq-32] @@ -480,7 +480,7 @@ INIT_XMM sse2 cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg lea mxd, [mxq*3] shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_v_m] %endif lea mxq, [sixtap_filter_v+mxq-96] @@ -543,7 +543,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h ; 4x4 block, V-only 4-tap filter cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my shl myd, 5 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_v_m] %endif lea myq, [fourtap_filter_v+myq-32] @@ -597,7 +597,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 lea myq, [myq*3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_v_m] %endif lea myq, [sixtap_filter_v+myq-96] @@ -667,7 +667,7 @@ FILTER_V 8 %if cpuflag(ssse3) cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vb_m] %endif pxor m4, m4 @@ -697,7 +697,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, p %else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vw_m] %endif pxor m6, m6 @@ -743,7 +743,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p %if cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vb_m] %endif pxor m4, m4 @@ -773,7 +773,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride %else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vw_m] %endif pxor m6, m6 diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm.asm b/media/ffvpx/libavcodec/x86/vp9itxfm.asm index 2c63fe514a..2f290f2f88 100644 --- a/media/ffvpx/libavcodec/x86/vp9itxfm.asm +++ b/media/ffvpx/libavcodec/x86/vp9itxfm.asm @@ -330,7 +330,9 @@ IDCT_4x4_FN ssse3 INIT_MMX %5 cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob %if WIN64 && notcpuflag(ssse3) +INIT_XMM cpuname WIN64_SPILL_XMM 8 +INIT_MMX cpuname %endif movdqa xmm5, [pd_8192] mova m0, [blockq+ 0] diff --git a/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm index 902685edf6..ebe6222285 100644 --- a/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm +++ b/media/ffvpx/libavcodec/x86/vp9itxfm_16bpp.asm @@ -303,7 +303,9 @@ IDCT4_10_FN %macro IADST4_FN 4 cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob %if WIN64 && notcpuflag(ssse3) +INIT_XMM cpuname WIN64_SPILL_XMM 8 +INIT_MMX cpuname %endif movdqa xmm5, [pd_8192] mova m0, [blockq+0*16+0] @@ -672,7 +674,7 @@ cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [default_8x8] movzx cntd, byte [ptrq+cntq-1] %else @@ -921,7 +923,7 @@ cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [%5_8x8] movzx cntd, byte [ptrq+cntq-1] %else @@ -1128,7 +1130,7 @@ cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [default_16x16] movzx cntd, byte [ptrq+cntq-1] %else @@ -1445,7 +1447,7 @@ cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [%7_16x16] movzx cntd, byte [ptrq+cntq-1] %else @@ -1958,7 +1960,7 @@ cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [default_32x32] movzx cntd, byte [ptrq+cntq-1] %else |