From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- media/ffvpx/libavutil/arm/asm.S | 367 +++++++++++++++++++ media/ffvpx/libavutil/arm/bswap.h | 70 ++++ media/ffvpx/libavutil/arm/cpu.c | 188 ++++++++++ media/ffvpx/libavutil/arm/cpu.h | 38 ++ media/ffvpx/libavutil/arm/float_dsp_arm.h | 29 ++ media/ffvpx/libavutil/arm/float_dsp_init_arm.c | 32 ++ media/ffvpx/libavutil/arm/float_dsp_init_neon.c | 59 +++ media/ffvpx/libavutil/arm/float_dsp_init_vfp.c | 46 +++ media/ffvpx/libavutil/arm/float_dsp_neon.S | 271 ++++++++++++++ media/ffvpx/libavutil/arm/float_dsp_vfp.S | 457 ++++++++++++++++++++++++ media/ffvpx/libavutil/arm/intmath.h | 146 ++++++++ media/ffvpx/libavutil/arm/intreadwrite.h | 91 +++++ media/ffvpx/libavutil/arm/moz.build | 18 + media/ffvpx/libavutil/arm/timer.h | 46 +++ 14 files changed, 1858 insertions(+) create mode 100644 media/ffvpx/libavutil/arm/asm.S create mode 100644 media/ffvpx/libavutil/arm/bswap.h create mode 100644 media/ffvpx/libavutil/arm/cpu.c create mode 100644 media/ffvpx/libavutil/arm/cpu.h create mode 100644 media/ffvpx/libavutil/arm/float_dsp_arm.h create mode 100644 media/ffvpx/libavutil/arm/float_dsp_init_arm.c create mode 100644 media/ffvpx/libavutil/arm/float_dsp_init_neon.c create mode 100644 media/ffvpx/libavutil/arm/float_dsp_init_vfp.c create mode 100644 media/ffvpx/libavutil/arm/float_dsp_neon.S create mode 100644 media/ffvpx/libavutil/arm/float_dsp_vfp.S create mode 100644 media/ffvpx/libavutil/arm/intmath.h create mode 100644 media/ffvpx/libavutil/arm/intreadwrite.h create mode 100644 media/ffvpx/libavutil/arm/moz.build create mode 100644 media/ffvpx/libavutil/arm/timer.h (limited to 'media/ffvpx/libavutil/arm') diff --git a/media/ffvpx/libavutil/arm/asm.S b/media/ffvpx/libavutil/arm/asm.S new file mode 100644 index 0000000000..e3a8c7f065 --- /dev/null +++ b/media/ffvpx/libavutil/arm/asm.S @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#ifdef __ELF__ +# define ELF +#else +# define ELF @ +#endif + +#if CONFIG_THUMB +# define A @ +# define T +#else +# define A +# define T @ +#endif + +#if HAVE_AS_FUNC +# define FUNC +#else +# define FUNC @ +#endif + +#if HAVE_AS_FPU_DIRECTIVE +# define FPU +#else +# define FPU @ +#endif + +#if CONFIG_THUMB && defined(__APPLE__) +# define TFUNC +#else +# define TFUNC @ +#endif + +#if HAVE_AS_ARCH_DIRECTIVE +#if HAVE_NEON + .arch armv7-a +#elif HAVE_ARMV6T2 + .arch armv6t2 +#elif HAVE_ARMV6 + .arch armv6 +#elif HAVE_ARMV5TE + .arch armv5te +#endif +#endif +#if HAVE_AS_OBJECT_ARCH +ELF .object_arch armv4 +#endif + +#if HAVE_NEON +FPU .fpu neon +ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch +ELF .eabi_attribute 12, 0 @ suppress Tag_Advanced_SIMD_arch +#elif HAVE_VFP +FPU .fpu vfp +ELF .eabi_attribute 10, 0 @ suppress Tag_FP_arch +#endif + + .syntax unified +T .thumb +ELF .eabi_attribute 25, 1 @ Tag_ABI_align_preserved +ELF .section .note.GNU-stack,"",%progbits @ Mark stack as non-executable + +.macro function name, export=0, align=2 + .set .Lpic_idx, 0 + .set .Lpic_gp, 0 + .macro endfunc + .if .Lpic_idx + .align 2 + .altmacro + put_pic %(.Lpic_idx - 1) + .noaltmacro + .endif + .if .Lpic_gp + .unreq gp + .endif +ELF .size \name, . - \name +FUNC .endfunc + .purgem endfunc + .endm + .text + .align \align + .if \export + .global EXTERN_ASM\name +ELF .type EXTERN_ASM\name, %function +FUNC .func EXTERN_ASM\name +TFUNC .thumb_func EXTERN_ASM\name +EXTERN_ASM\name: + .else +ELF .type \name, %function +FUNC .func \name +TFUNC .thumb_func \name +\name: + .endif +.endm + +.macro const name, align=2, relocate=0 + .macro endconst +ELF .size \name, . - \name + .purgem endconst + .endm +#if HAVE_SECTION_DATA_REL_RO +.if \relocate + .section .data.rel.ro +.else + .section .rodata +.endif +#elif defined(_WIN32) + .section .rdata +#elif !defined(__MACH__) + .section .rodata +#else + .const_data +#endif + .align \align +\name: +.endm + +#if !HAVE_ARMV6T2_EXTERNAL +.macro movw rd, val + mov \rd, \val & 255 + orr \rd, \val & ~255 +.endm +#endif + +.macro mov32 rd, val +#if HAVE_ARMV6T2_EXTERNAL + movw \rd, #(\val) & 0xffff + .if (\val) >> 16 + movt \rd, #(\val) >> 16 + .endif +#else + ldr \rd, =\val +#endif +.endm + +.macro put_pic num + put_pic_\num +.endm + +.macro do_def_pic num, val, label + .macro put_pic_\num + .if \num + .altmacro + put_pic %(\num - 1) + .noaltmacro + .endif +\label: .word \val + .purgem put_pic_\num + .endm +.endm + +.macro def_pic val, label + .altmacro + do_def_pic %.Lpic_idx, \val, \label + .noaltmacro + .set .Lpic_idx, .Lpic_idx + 1 +.endm + +.macro ldpic rd, val, indir=0 + ldr \rd, .Lpicoff\@ +.Lpic\@: + .if \indir +A ldr \rd, [pc, \rd] +T add \rd, pc +T ldr \rd, [\rd] + .else + add \rd, pc + .endif + def_pic \val - (.Lpic\@ + (8 >> CONFIG_THUMB)), .Lpicoff\@ +.endm + +.macro movrel rd, val +#if CONFIG_PIC + ldpic \rd, \val +#elif HAVE_ARMV6T2_EXTERNAL && !defined(__APPLE__) + movw \rd, #:lower16:\val + movt \rd, #:upper16:\val +#else + ldr \rd, =\val +#endif +.endm + +.macro movrelx rd, val, gp + .ifc \rd,\gp + .error "movrelx needs two distinct registers" + .endif + .ifc \rd\()_\gp,r12_ + .warning "movrelx rd=\rd without explicit set gp" + .endif + .ifc \rd\()_\gp,ip_ + .warning "movrelx rd=\rd without explicit set gp" + .endif +#if CONFIG_PIC && defined(__ELF__) + .ifnb \gp + .if .Lpic_gp + .unreq gp + .endif + gp .req \gp + ldpic gp, _GLOBAL_OFFSET_TABLE_ + .elseif !.Lpic_gp + gp .req r12 + ldpic gp, _GLOBAL_OFFSET_TABLE_ + .endif + .set .Lpic_gp, 1 + ldr \rd, .Lpicoff\@ + ldr \rd, [gp, \rd] + def_pic \val(GOT), .Lpicoff\@ +#elif CONFIG_PIC && defined(__APPLE__) + ldpic \rd, .Lpic\@, indir=1 + .non_lazy_symbol_pointer +.Lpic\@: + .indirect_symbol \val + .word 0 + .text +#else + movrel \rd, \val +#endif +.endm + +.macro add_sh rd, rn, rm, sh:vararg +A add \rd, \rn, \rm, \sh +T mov \rm, \rm, \sh +T add \rd, \rn, \rm +.endm + +.macro ldr_pre rt, rn, rm:vararg +A ldr \rt, [\rn, \rm]! +T add \rn, \rn, \rm +T ldr \rt, [\rn] +.endm + +.macro ldr_dpre rt, rn, rm:vararg +A ldr \rt, [\rn, -\rm]! +T sub \rn, \rn, \rm +T ldr \rt, [\rn] +.endm + +.macro ldr_nreg rt, rn, rm:vararg +A ldr \rt, [\rn, -\rm] +T sub \rt, \rn, \rm +T ldr \rt, [\rt] +.endm + +.macro ldr_post rt, rn, rm:vararg +A ldr \rt, [\rn], \rm +T ldr \rt, [\rn] +T add \rn, \rn, \rm +.endm + +.macro ldrc_pre cc, rt, rn, rm:vararg +A ldr\cc \rt, [\rn, \rm]! +T itt \cc +T add\cc \rn, \rn, \rm +T ldr\cc \rt, [\rn] +.endm + +.macro ldrd_reg rt, rt2, rn, rm +A ldrd \rt, \rt2, [\rn, \rm] +T add \rt, \rn, \rm +T ldrd \rt, \rt2, [\rt] +.endm + +.macro ldrd_post rt, rt2, rn, rm +A ldrd \rt, \rt2, [\rn], \rm +T ldrd \rt, \rt2, [\rn] +T add \rn, \rn, \rm +.endm + +.macro ldrh_pre rt, rn, rm +A ldrh \rt, [\rn, \rm]! +T add \rn, \rn, \rm +T ldrh \rt, [\rn] +.endm + +.macro ldrh_dpre rt, rn, rm +A ldrh \rt, [\rn, -\rm]! +T sub \rn, \rn, \rm +T ldrh \rt, [\rn] +.endm + +.macro ldrh_post rt, rn, rm +A ldrh \rt, [\rn], \rm +T ldrh \rt, [\rn] +T add \rn, \rn, \rm +.endm + +.macro ldrb_post rt, rn, rm +A ldrb \rt, [\rn], \rm +T ldrb \rt, [\rn] +T add \rn, \rn, \rm +.endm + +.macro str_post rt, rn, rm:vararg +A str \rt, [\rn], \rm +T str \rt, [\rn] +T add \rn, \rn, \rm +.endm + +.macro strb_post rt, rn, rm:vararg +A strb \rt, [\rn], \rm +T strb \rt, [\rn] +T add \rn, \rn, \rm +.endm + +.macro strd_post rt, rt2, rn, rm +A strd \rt, \rt2, [\rn], \rm +T strd \rt, \rt2, [\rn] +T add \rn, \rn, \rm +.endm + +.macro strh_pre rt, rn, rm +A strh \rt, [\rn, \rm]! +T add \rn, \rn, \rm +T strh \rt, [\rn] +.endm + +.macro strh_dpre rt, rn, rm +A strh \rt, [\rn, -\rm]! +T sub \rn, \rn, \rm +T strh \rt, [\rn] +.endm + +.macro strh_post rt, rn, rm +A strh \rt, [\rn], \rm +T strh \rt, [\rn] +T add \rn, \rn, \rm +.endm + +.macro strh_dpost rt, rn, rm +A strh \rt, [\rn], -\rm +T strh \rt, [\rn] +T sub \rn, \rn, \rm +.endm + +#if HAVE_VFP_ARGS +ELF .eabi_attribute 28, 1 +# define VFP +# define NOVFP @ +#else +# define VFP @ +# define NOVFP +#endif + +#define GLUE(a, b) a ## b +#define JOIN(a, b) GLUE(a, b) +#define X(s) JOIN(EXTERN_ASM, s) diff --git a/media/ffvpx/libavutil/arm/bswap.h b/media/ffvpx/libavutil/arm/bswap.h new file mode 100644 index 0000000000..c3460e035d --- /dev/null +++ b/media/ffvpx/libavutil/arm/bswap.h @@ -0,0 +1,70 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_ARM_BSWAP_H +#define AVUTIL_ARM_BSWAP_H + +#include +#include "config.h" +#include "libavutil/attributes.h" + +#ifdef __ARMCC_VERSION + +#if HAVE_ARMV6 +#define av_bswap32 av_bswap32 +static av_always_inline av_const uint32_t av_bswap32(uint32_t x) +{ + return __rev(x); +} +#endif /* HAVE_ARMV6 */ + +#elif HAVE_INLINE_ASM + +#if HAVE_ARMV6_INLINE +#define av_bswap16 av_bswap16 +static av_always_inline av_const unsigned av_bswap16(unsigned x) +{ + unsigned y; + + __asm__("rev16 %0, %1" : "=r"(y) : "r"(x)); + return y; +} +#endif + +#if AV_GCC_VERSION_AT_MOST(4,4) +#define av_bswap32 av_bswap32 +static av_always_inline av_const uint32_t av_bswap32(uint32_t x) +{ + uint32_t y; +#if HAVE_ARMV6_INLINE + __asm__("rev %0, %1" : "=r"(y) : "r"(x)); +#else + uint32_t t; + __asm__ ("eor %1, %2, %2, ror #16 \n\t" + "bic %1, %1, #0xFF0000 \n\t" + "mov %0, %2, ror #8 \n\t" + "eor %0, %0, %1, lsr #8 \n\t" + : "=r"(y), "=&r"(t) : "r"(x)); +#endif /* HAVE_ARMV6_INLINE */ + return y; +} +#endif /* AV_GCC_VERSION_AT_MOST(4,4) */ + +#endif /* __ARMCC_VERSION */ + +#endif /* AVUTIL_ARM_BSWAP_H */ diff --git a/media/ffvpx/libavutil/arm/cpu.c b/media/ffvpx/libavutil/arm/cpu.c new file mode 100644 index 0000000000..c84a655c37 --- /dev/null +++ b/media/ffvpx/libavutil/arm/cpu.c @@ -0,0 +1,188 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/cpu.h" +#include "libavutil/cpu_internal.h" +#include "config.h" + +#define CORE_FLAG(f) \ + (AV_CPU_FLAG_ ## f * (HAVE_ ## f ## _EXTERNAL || HAVE_ ## f ## _INLINE)) + +#define CORE_CPU_FLAGS \ + (CORE_FLAG(ARMV5TE) | \ + CORE_FLAG(ARMV6) | \ + CORE_FLAG(ARMV6T2) | \ + CORE_FLAG(VFP) | \ + CORE_FLAG(VFPV3) | \ + CORE_FLAG(NEON)) + +#if defined __linux__ || defined __ANDROID__ + +#include +#include +#include +#include "libavutil/avstring.h" + +#if HAVE_GETAUXVAL +#include +#endif + +#define AT_HWCAP 16 + +/* Relevant HWCAP values from kernel headers */ +#define HWCAP_VFP (1 << 6) +#define HWCAP_EDSP (1 << 7) +#define HWCAP_THUMBEE (1 << 11) +#define HWCAP_NEON (1 << 12) +#define HWCAP_VFPv3 (1 << 13) +#define HWCAP_TLS (1 << 15) + +static int get_auxval(uint32_t *hwcap) +{ +#if HAVE_GETAUXVAL + unsigned long ret = getauxval(AT_HWCAP); + if (ret == 0) + return -1; + *hwcap = ret; + return 0; +#else + return -1; +#endif +} + +static int get_hwcap(uint32_t *hwcap) +{ + struct { uint32_t a_type; uint32_t a_val; } auxv; + FILE *f = fopen("/proc/self/auxv", "r"); + int err = -1; + + if (!f) + return -1; + + while (fread(&auxv, sizeof(auxv), 1, f) > 0) { + if (auxv.a_type == AT_HWCAP) { + *hwcap = auxv.a_val; + err = 0; + break; + } + } + + fclose(f); + return err; +} + +static int get_cpuinfo(uint32_t *hwcap) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + char buf[200]; + + if (!f) + return -1; + + *hwcap = 0; + while (fgets(buf, sizeof(buf), f)) { + if (av_strstart(buf, "Features", NULL)) { + if (strstr(buf, " edsp ")) + *hwcap |= HWCAP_EDSP; + if (strstr(buf, " tls ")) + *hwcap |= HWCAP_TLS; + if (strstr(buf, " thumbee ")) + *hwcap |= HWCAP_THUMBEE; + if (strstr(buf, " vfp ")) + *hwcap |= HWCAP_VFP; + if (strstr(buf, " vfpv3 ")) + *hwcap |= HWCAP_VFPv3; + if (strstr(buf, " neon ") || strstr(buf, " asimd ")) + *hwcap |= HWCAP_NEON; + if (strstr(buf, " fp ")) // Listed on 64 bit ARMv8 kernels + *hwcap |= HWCAP_VFP | HWCAP_VFPv3; + break; + } + } + fclose(f); + return 0; +} + +int ff_get_cpu_flags_arm(void) +{ + int flags = CORE_CPU_FLAGS; + uint32_t hwcap; + + if (get_auxval(&hwcap) < 0) + if (get_hwcap(&hwcap) < 0) + if (get_cpuinfo(&hwcap) < 0) + return flags; + +#define check_cap(cap, flag) do { \ + if (hwcap & HWCAP_ ## cap) \ + flags |= AV_CPU_FLAG_ ## flag; \ + } while (0) + + /* No flags explicitly indicate v6 or v6T2 so check others which + imply support. */ + check_cap(EDSP, ARMV5TE); + check_cap(TLS, ARMV6); + check_cap(THUMBEE, ARMV6T2); + check_cap(VFP, VFP); + check_cap(VFPv3, VFPV3); + check_cap(NEON, NEON); + + /* The v6 checks above are not reliable so let higher flags + trickle down. */ + if (flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON)) + flags |= AV_CPU_FLAG_ARMV6T2; + else if (flags & (AV_CPU_FLAG_ARMV6T2 | AV_CPU_FLAG_ARMV6)) + /* Some functions use the 'setend' instruction which is deprecated on ARMv8 + * and serializing on some ARMv7 cores. This ensures such functions + * are only enabled on ARMv6. */ + flags |= AV_CPU_FLAG_SETEND; + + if (flags & AV_CPU_FLAG_ARMV6T2) + flags |= AV_CPU_FLAG_ARMV6; + + /* set the virtual VFPv2 vector mode flag */ + if ((flags & AV_CPU_FLAG_VFP) && !(flags & (AV_CPU_FLAG_VFPV3 | AV_CPU_FLAG_NEON))) + flags |= AV_CPU_FLAG_VFP_VM; + + return flags; +} + +#else + +int ff_get_cpu_flags_arm(void) +{ + return AV_CPU_FLAG_ARMV5TE * HAVE_ARMV5TE | + AV_CPU_FLAG_ARMV6 * HAVE_ARMV6 | + AV_CPU_FLAG_ARMV6T2 * HAVE_ARMV6T2 | + AV_CPU_FLAG_VFP * HAVE_VFP | + AV_CPU_FLAG_VFPV3 * HAVE_VFPV3 | + AV_CPU_FLAG_NEON * HAVE_NEON | + AV_CPU_FLAG_SETEND * !(HAVE_NEON | HAVE_VFPV3); +} + +#endif + +size_t ff_get_cpu_max_align_arm(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_NEON) + return 16; + + return 8; +} diff --git a/media/ffvpx/libavutil/arm/cpu.h b/media/ffvpx/libavutil/arm/cpu.h new file mode 100644 index 0000000000..1d6cc65dc4 --- /dev/null +++ b/media/ffvpx/libavutil/arm/cpu.h @@ -0,0 +1,38 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_ARM_CPU_H +#define AVUTIL_ARM_CPU_H + +#include "libavutil/cpu.h" +#include "libavutil/cpu_internal.h" + +#define have_armv5te(flags) CPUEXT(flags, ARMV5TE) +#define have_armv6(flags) CPUEXT(flags, ARMV6) +#define have_armv6t2(flags) CPUEXT(flags, ARMV6T2) +#define have_vfp(flags) CPUEXT(flags, VFP) +#define have_vfpv3(flags) CPUEXT(flags, VFPV3) +#define have_neon(flags) CPUEXT(flags, NEON) +#define have_setend(flags) CPUEXT(flags, SETEND) + +/* some functions use the VFPv2 vector mode which is deprecated in ARMv7-A + * and might trap on such CPU depending on the OS configuration */ +#define have_vfp_vm(flags) \ + (HAVE_VFP && ((flags) & AV_CPU_FLAG_VFP_VM)) + +#endif /* AVUTIL_ARM_CPU_H */ diff --git a/media/ffvpx/libavutil/arm/float_dsp_arm.h b/media/ffvpx/libavutil/arm/float_dsp_arm.h new file mode 100644 index 0000000000..fe311cc0d2 --- /dev/null +++ b/media/ffvpx/libavutil/arm/float_dsp_arm.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_ARM_FLOAT_DSP_ARM_H +#define AVUTIL_ARM_FLOAT_DSP_ARM_H + +#include "libavutil/float_dsp.h" + +void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp, int cpu_flags); +void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp); + +#endif /* AVUTIL_ARM_FLOAT_DSP_ARM_H */ diff --git a/media/ffvpx/libavutil/arm/float_dsp_init_arm.c b/media/ffvpx/libavutil/arm/float_dsp_init_arm.c new file mode 100644 index 0000000000..678762862e --- /dev/null +++ b/media/ffvpx/libavutil/arm/float_dsp_init_arm.c @@ -0,0 +1,32 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/float_dsp.h" +#include "cpu.h" +#include "float_dsp_arm.h" + +av_cold void ff_float_dsp_init_arm(AVFloatDSPContext *fdsp) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_vfp(cpu_flags)) + ff_float_dsp_init_vfp(fdsp, cpu_flags); + if (have_neon(cpu_flags)) + ff_float_dsp_init_neon(fdsp); +} diff --git a/media/ffvpx/libavutil/arm/float_dsp_init_neon.c b/media/ffvpx/libavutil/arm/float_dsp_init_neon.c new file mode 100644 index 0000000000..689aa77c7b --- /dev/null +++ b/media/ffvpx/libavutil/arm/float_dsp_init_neon.c @@ -0,0 +1,59 @@ +/* + * ARM NEON optimised Float DSP functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/float_dsp.h" +#include "float_dsp_arm.h" + +void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); + +void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, + int len); + +void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, + int len); + +void ff_vector_fmul_window_neon(float *dst, const float *src0, + const float *src1, const float *win, int len); + +void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1, + const float *src2, int len); + +void ff_vector_fmul_reverse_neon(float *dst, const float *src0, + const float *src1, int len); + +void ff_butterflies_float_neon(float *v1, float *v2, int len); + +float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); + +av_cold void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) +{ + fdsp->vector_fmul = ff_vector_fmul_neon; + fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon; + fdsp->vector_fmul_window = ff_vector_fmul_window_neon; + fdsp->vector_fmul_add = ff_vector_fmul_add_neon; + fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; + fdsp->butterflies_float = ff_butterflies_float_neon; + fdsp->scalarproduct_float = ff_scalarproduct_float_neon; +} diff --git a/media/ffvpx/libavutil/arm/float_dsp_init_vfp.c b/media/ffvpx/libavutil/arm/float_dsp_init_vfp.c new file mode 100644 index 0000000000..05873e7e37 --- /dev/null +++ b/media/ffvpx/libavutil/arm/float_dsp_init_vfp.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2008 Siarhei Siamashka + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/float_dsp.h" +#include "cpu.h" +#include "float_dsp_arm.h" + +void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, + int len); + +void ff_vector_fmul_window_vfp(float *dst, const float *src0, + const float *src1, const float *win, int len); + +void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, + const float *src1, int len); + +void ff_butterflies_float_vfp(float *av_restrict v1, float *av_restrict v2, int len); + +av_cold void ff_float_dsp_init_vfp(AVFloatDSPContext *fdsp, int cpu_flags) +{ + if (have_vfp_vm(cpu_flags)) { + fdsp->vector_fmul = ff_vector_fmul_vfp; + fdsp->vector_fmul_window = ff_vector_fmul_window_vfp; + } + fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_vfp; + if (have_vfp_vm(cpu_flags)) + fdsp->butterflies_float = ff_butterflies_float_vfp; +} diff --git a/media/ffvpx/libavutil/arm/float_dsp_neon.S b/media/ffvpx/libavutil/arm/float_dsp_neon.S new file mode 100644 index 0000000000..3823227608 --- /dev/null +++ b/media/ffvpx/libavutil/arm/float_dsp_neon.S @@ -0,0 +1,271 @@ +/* + * ARM NEON optimised Float DSP functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "asm.S" + +function ff_vector_fmul_neon, export=1 + subs r3, r3, #8 + vld1.32 {d0-d3}, [r1,:128]! + vld1.32 {d4-d7}, [r2,:128]! + vmul.f32 q8, q0, q2 + vmul.f32 q9, q1, q3 + beq 3f + bics ip, r3, #15 + beq 2f +1: subs ip, ip, #16 + vld1.32 {d0-d1}, [r1,:128]! + vld1.32 {d4-d5}, [r2,:128]! + vmul.f32 q10, q0, q2 + vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d6-d7}, [r2,:128]! + vmul.f32 q11, q1, q3 + vst1.32 {d16-d19},[r0,:128]! + vld1.32 {d0-d1}, [r1,:128]! + vld1.32 {d4-d5}, [r2,:128]! + vmul.f32 q8, q0, q2 + vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d6-d7}, [r2,:128]! + vmul.f32 q9, q1, q3 + vst1.32 {d20-d23},[r0,:128]! + bne 1b + ands r3, r3, #15 + beq 3f +2: vld1.32 {d0-d1}, [r1,:128]! + vld1.32 {d4-d5}, [r2,:128]! + vst1.32 {d16-d17},[r0,:128]! + vmul.f32 q8, q0, q2 + vld1.32 {d2-d3}, [r1,:128]! + vld1.32 {d6-d7}, [r2,:128]! + vst1.32 {d18-d19},[r0,:128]! + vmul.f32 q9, q1, q3 +3: vst1.32 {d16-d19},[r0,:128]! + bx lr +endfunc + +function ff_vector_fmac_scalar_neon, export=1 +VFP len .req r2 +VFP acc .req r3 +NOVFP len .req r3 +NOVFP acc .req r2 +VFP vdup.32 q15, d0[0] +NOVFP vdup.32 q15, r2 + bics r12, len, #15 + mov acc, r0 + beq 3f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! +1: vmla.f32 q8, q0, q15 + vld1.32 {q2}, [r1,:128]! + vld1.32 {q10}, [acc,:128]! + vmla.f32 q9, q1, q15 + vld1.32 {q3}, [r1,:128]! + vld1.32 {q11}, [acc,:128]! + vmla.f32 q10, q2, q15 + vst1.32 {q8}, [r0,:128]! + vmla.f32 q11, q3, q15 + vst1.32 {q9}, [r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vst1.32 {q10}, [r0,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! + vst1.32 {q11}, [r0,:128]! + b 1b +2: vst1.32 {q10}, [r0,:128]! + vst1.32 {q11}, [r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vmla.f32 q8, q0, q15 + vst1.32 {q8}, [r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc + +function ff_vector_fmul_scalar_neon, export=1 +VFP len .req r2 +NOVFP len .req r3 +VFP vdup.32 q8, d0[0] +NOVFP vdup.32 q8, r2 + bics r12, len, #15 + beq 3f + vld1.32 {q0},[r1,:128]! + vld1.32 {q1},[r1,:128]! +1: vmul.f32 q0, q0, q8 + vld1.32 {q2},[r1,:128]! + vmul.f32 q1, q1, q8 + vld1.32 {q3},[r1,:128]! + vmul.f32 q2, q2, q8 + vst1.32 {q0},[r0,:128]! + vmul.f32 q3, q3, q8 + vst1.32 {q1},[r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0},[r1,:128]! + vst1.32 {q2},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vst1.32 {q3},[r0,:128]! + b 1b +2: vst1.32 {q2},[r0,:128]! + vst1.32 {q3},[r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0},[r1,:128]! + vmul.f32 q0, q0, q8 + vst1.32 {q0},[r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc + +function ff_vector_fmul_window_neon, export=1 + push {r4,r5,lr} + ldr lr, [sp, #12] + sub r2, r2, #8 + sub r5, lr, #2 + add r2, r2, r5, lsl #2 + add r4, r3, r5, lsl #3 + add ip, r0, r5, lsl #3 + mov r5, #-16 + vld1.32 {d0,d1}, [r1,:128]! + vld1.32 {d2,d3}, [r2,:128], r5 + vld1.32 {d4,d5}, [r3,:128]! + vld1.32 {d6,d7}, [r4,:128], r5 +1: subs lr, lr, #4 + vmul.f32 d22, d0, d4 + vrev64.32 q3, q3 + vmul.f32 d23, d1, d5 + vrev64.32 q1, q1 + vmul.f32 d20, d0, d7 + vmul.f32 d21, d1, d6 + beq 2f + vmla.f32 d22, d3, d7 + vld1.32 {d0,d1}, [r1,:128]! + vmla.f32 d23, d2, d6 + vld1.32 {d18,d19},[r2,:128], r5 + vmls.f32 d20, d3, d4 + vld1.32 {d24,d25},[r3,:128]! + vmls.f32 d21, d2, d5 + vld1.32 {d6,d7}, [r4,:128], r5 + vmov q1, q9 + vrev64.32 q11, q11 + vmov q2, q12 + vswp d22, d23 + vst1.32 {d20,d21},[r0,:128]! + vst1.32 {d22,d23},[ip,:128], r5 + b 1b +2: vmla.f32 d22, d3, d7 + vmla.f32 d23, d2, d6 + vmls.f32 d20, d3, d4 + vmls.f32 d21, d2, d5 + vrev64.32 q11, q11 + vswp d22, d23 + vst1.32 {d20,d21},[r0,:128]! + vst1.32 {d22,d23},[ip,:128], r5 + pop {r4,r5,pc} +endfunc + +function ff_vector_fmul_add_neon, export=1 + ldr r12, [sp] + vld1.32 {q0-q1}, [r1,:128]! + vld1.32 {q8-q9}, [r2,:128]! + vld1.32 {q2-q3}, [r3,:128]! + vmul.f32 q10, q0, q8 + vmul.f32 q11, q1, q9 +1: vadd.f32 q12, q2, q10 + vadd.f32 q13, q3, q11 + pld [r1, #16] + pld [r2, #16] + pld [r3, #16] + subs r12, r12, #8 + beq 2f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [r2,:128]! + vmul.f32 q10, q0, q8 + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [r2,:128]! + vmul.f32 q11, q1, q9 + vld1.32 {q2-q3}, [r3,:128]! + vst1.32 {q12-q13},[r0,:128]! + b 1b +2: vst1.32 {q12-q13},[r0,:128]! + bx lr +endfunc + +function ff_vector_fmul_reverse_neon, export=1 + add r2, r2, r3, lsl #2 + sub r2, r2, #32 + mov r12, #-32 + vld1.32 {q0-q1}, [r1,:128]! + vld1.32 {q2-q3}, [r2,:128], r12 +1: pld [r1, #32] + vrev64.32 q3, q3 + vmul.f32 d16, d0, d7 + vmul.f32 d17, d1, d6 + pld [r2, #-32] + vrev64.32 q2, q2 + vmul.f32 d18, d2, d5 + vmul.f32 d19, d3, d4 + subs r3, r3, #8 + beq 2f + vld1.32 {q0-q1}, [r1,:128]! + vld1.32 {q2-q3}, [r2,:128], r12 + vst1.32 {q8-q9}, [r0,:128]! + b 1b +2: vst1.32 {q8-q9}, [r0,:128]! + bx lr +endfunc + +function ff_butterflies_float_neon, export=1 +1: vld1.32 {q0},[r0,:128] + vld1.32 {q1},[r1,:128] + vsub.f32 q2, q0, q1 + vadd.f32 q1, q0, q1 + vst1.32 {q2},[r1,:128]! + vst1.32 {q1},[r0,:128]! + subs r2, r2, #4 + bgt 1b + bx lr +endfunc + +function ff_scalarproduct_float_neon, export=1 + vmov.f32 q2, #0.0 +1: vld1.32 {q0},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vmla.f32 q2, q0, q1 + subs r2, r2, #4 + bgt 1b + vadd.f32 d0, d4, d5 + vpadd.f32 d0, d0, d0 +NOVFP vmov.32 r0, d0[0] + bx lr +endfunc diff --git a/media/ffvpx/libavutil/arm/float_dsp_vfp.S b/media/ffvpx/libavutil/arm/float_dsp_vfp.S new file mode 100644 index 0000000000..7db2452284 --- /dev/null +++ b/media/ffvpx/libavutil/arm/float_dsp_vfp.S @@ -0,0 +1,457 @@ +/* + * Copyright (c) 2008 Siarhei Siamashka + * + * This file is part of FFmpeg + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "asm.S" + +/** + * Assume that len is a positive number and is multiple of 8 + */ +@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len) +function ff_vector_fmul_vfp, export=1 + vpush {d8-d15} + fmrx r12, fpscr + orr r12, r12, #(3 << 16) /* set vector size to 4 */ + fmxr fpscr, r12 + + vldmia r1!, {s0-s3} + vldmia r2!, {s8-s11} + vldmia r1!, {s4-s7} + vldmia r2!, {s12-s15} + vmul.f32 s8, s0, s8 +1: + subs r3, r3, #16 + vmul.f32 s12, s4, s12 + itttt ge + vldmiage r1!, {s16-s19} + vldmiage r2!, {s24-s27} + vldmiage r1!, {s20-s23} + vldmiage r2!, {s28-s31} + it ge + vmulge.f32 s24, s16, s24 + vstmia r0!, {s8-s11} + vstmia r0!, {s12-s15} + it ge + vmulge.f32 s28, s20, s28 + itttt gt + vldmiagt r1!, {s0-s3} + vldmiagt r2!, {s8-s11} + vldmiagt r1!, {s4-s7} + vldmiagt r2!, {s12-s15} + ittt ge + vmulge.f32 s8, s0, s8 + vstmiage r0!, {s24-s27} + vstmiage r0!, {s28-s31} + bgt 1b + + bic r12, r12, #(7 << 16) /* set vector size back to 1 */ + fmxr fpscr, r12 + vpop {d8-d15} + bx lr +endfunc + +/** + * ARM VFP implementation of 'vector_fmul_window_c' function + * Assume that len is a positive non-zero number + */ +@ void ff_vector_fmul_window_vfp(float *dst, const float *src0, +@ const float *src1, const float *win, int len) +function ff_vector_fmul_window_vfp, export=1 +DST0 .req a1 +SRC0 .req a2 +SRC1 .req a3 +WIN0 .req a4 +LEN .req v1 +DST1 .req v2 +WIN1 .req v3 +OLDFPSCR .req ip + + push {v1-v3,lr} + ldr LEN, [sp, #4*4+0] + vpush {s16-s31} + fmrx OLDFPSCR, FPSCR + add DST1, DST0, LEN, lsl #3 + add SRC1, SRC1, LEN, lsl #2 + add WIN1, WIN0, LEN, lsl #3 + + tst LEN, #7 + beq 4f @ common case: len is a multiple of 8 + + ldr lr, =0x03000000 @ RunFast mode, scalar mode + fmxr FPSCR, lr + + tst LEN, #1 + beq 1f + vldmdb WIN1!, {s0} + vldmia SRC0!, {s8} + vldmia WIN0!, {s16} + vmul.f s24, s0, s8 + vldmdb SRC1!, {s20} + vmul.f s8, s16, s8 + vmls.f s24, s16, s20 + vmla.f s8, s0, s20 + vstmia DST0!, {s24} + vstmdb DST1!, {s8} +1: + tst LEN, #2 + beq 2f + vldmdb WIN1!, {s0} + vldmdb WIN1!, {s1} + vldmia SRC0!, {s8-s9} + vldmia WIN0!, {s16-s17} + vmul.f s24, s0, s8 + vmul.f s25, s1, s9 + vldmdb SRC1!, {s20} + vldmdb SRC1!, {s21} + vmul.f s8, s16, s8 + vmul.f s9, s17, s9 + vmls.f s24, s16, s20 + vmls.f s25, s17, s21 + vmla.f s8, s0, s20 + vmla.f s9, s1, s21 + vstmia DST0!, {s24-s25} + vstmdb DST1!, {s8} + vstmdb DST1!, {s9} +2: + tst LEN, #4 + beq 3f + vldmdb WIN1!, {s0} + vldmdb WIN1!, {s1} + vldmdb WIN1!, {s2} + vldmdb WIN1!, {s3} + vldmia SRC0!, {s8-s11} + vldmia WIN0!, {s16-s19} + vmul.f s24, s0, s8 + vmul.f s25, s1, s9 + vmul.f s26, s2, s10 + vmul.f s27, s3, s11 + vldmdb SRC1!, {s20} + vldmdb SRC1!, {s21} + vldmdb SRC1!, {s22} + vldmdb SRC1!, {s23} + vmul.f s8, s16, s8 + vmul.f s9, s17, s9 + vmul.f s10, s18, s10 + vmul.f s11, s19, s11 + vmls.f s24, s16, s20 + vmls.f s25, s17, s21 + vmls.f s26, s18, s22 + vmls.f s27, s19, s23 + vmla.f s8, s0, s20 + vmla.f s9, s1, s21 + vmla.f s10, s2, s22 + vmla.f s11, s3, s23 + vstmia DST0!, {s24-s27} + vstmdb DST1!, {s8} + vstmdb DST1!, {s9} + vstmdb DST1!, {s10} + vstmdb DST1!, {s11} +3: + bics LEN, LEN, #7 + beq 7f +4: + ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 + fmxr FPSCR, lr + + vldmdb WIN1!, {s0} + vldmdb WIN1!, {s1} + vldmdb WIN1!, {s2} + vldmdb WIN1!, {s3} + vldmia SRC0!, {s8-s11} + vldmia WIN0!, {s16-s19} + vmul.f s24, s0, s8 @ vector * vector + vldmdb SRC1!, {s20} + vldmdb SRC1!, {s21} + vldmdb SRC1!, {s22} + vldmdb SRC1!, {s23} + vmul.f s8, s16, s8 @ vector * vector + vmls.f s24, s16, s20 @ vector * vector + vldmdb WIN1!, {s4} + vldmdb WIN1!, {s5} + vldmdb WIN1!, {s6} + vldmdb WIN1!, {s7} + vldmia SRC0!, {s12-s13} + vmla.f s8, s0, s20 @ vector * vector + vldmia SRC0!, {s14-s15} + subs LEN, LEN, #8 + beq 6f +5: vldmia WIN0!, {s20-s23} + vmul.f s28, s4, s12 @ vector * vector + vstmia DST0!, {s24-s25} + vldmdb SRC1!, {s16} + vldmdb SRC1!, {s17} + vldmdb SRC1!, {s18} + vldmdb SRC1!, {s19} + vmul.f s12, s20, s12 @ vector * vector + vstmia DST0!, {s26-s27} + vstmdb DST1!, {s8} + vstmdb DST1!, {s9} + vstmdb DST1!, {s10} + vstmdb DST1!, {s11} + vmls.f s28, s20, s16 @ vector * vector + vldmdb WIN1!, {s0} + vldmdb WIN1!, {s1} + vldmdb WIN1!, {s2} + vldmdb WIN1!, {s3} + vldmia SRC0!, {s8-s9} + vmla.f s12, s4, s16 @ vector * vector + vldmia SRC0!, {s10-s11} + subs LEN, LEN, #8 + vldmia WIN0!, {s16-s19} + vmul.f s24, s0, s8 @ vector * vector + vstmia DST0!, {s28-s29} + vldmdb SRC1!, {s20} + vldmdb SRC1!, {s21} + vldmdb SRC1!, {s22} + vldmdb SRC1!, {s23} + vmul.f s8, s16, s8 @ vector * vector + vstmia DST0!, {s30-s31} + vstmdb DST1!, {s12} + vstmdb DST1!, {s13} + vstmdb DST1!, {s14} + vstmdb DST1!, {s15} + vmls.f s24, s16, s20 @ vector * vector + vldmdb WIN1!, {s4} + vldmdb WIN1!, {s5} + vldmdb WIN1!, {s6} + vldmdb WIN1!, {s7} + vldmia SRC0!, {s12-s13} + vmla.f s8, s0, s20 @ vector * vector + vldmia SRC0!, {s14-s15} + bne 5b +6: vldmia WIN0!, {s20-s23} + vmul.f s28, s4, s12 @ vector * vector + vstmia DST0!, {s24-s25} + vldmdb SRC1!, {s16} + vldmdb SRC1!, {s17} + vldmdb SRC1!, {s18} + vldmdb SRC1!, {s19} + vmul.f s12, s20, s12 @ vector * vector + vstmia DST0!, {s26-s27} + vstmdb DST1!, {s8} + vstmdb DST1!, {s9} + vstmdb DST1!, {s10} + vstmdb DST1!, {s11} + vmls.f s28, s20, s16 @ vector * vector + vmla.f s12, s4, s16 @ vector * vector + vstmia DST0!, {s28-s31} + vstmdb DST1!, {s12} + vstmdb DST1!, {s13} + vstmdb DST1!, {s14} + vstmdb DST1!, {s15} +7: + fmxr FPSCR, OLDFPSCR + vpop {s16-s31} + pop {v1-v3,pc} + + .unreq DST0 + .unreq SRC0 + .unreq SRC1 + .unreq WIN0 + .unreq LEN + .unreq OLDFPSCR + .unreq DST1 + .unreq WIN1 +endfunc + +/** + * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function. + * Assume that len is a positive number and is multiple of 8 + */ +@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, +@ const float *src1, int len) +function ff_vector_fmul_reverse_vfp, export=1 + vpush {d8-d15} + add r2, r2, r3, lsl #2 + vldmdb r2!, {s0-s3} + vldmia r1!, {s8-s11} + vldmdb r2!, {s4-s7} + vldmia r1!, {s12-s15} + vmul.f32 s8, s3, s8 + vmul.f32 s9, s2, s9 + vmul.f32 s10, s1, s10 + vmul.f32 s11, s0, s11 +1: + subs r3, r3, #16 + it ge + vldmdbge r2!, {s16-s19} + vmul.f32 s12, s7, s12 + it ge + vldmiage r1!, {s24-s27} + vmul.f32 s13, s6, s13 + it ge + vldmdbge r2!, {s20-s23} + vmul.f32 s14, s5, s14 + it ge + vldmiage r1!, {s28-s31} + vmul.f32 s15, s4, s15 + it ge + vmulge.f32 s24, s19, s24 + it gt + vldmdbgt r2!, {s0-s3} + it ge + vmulge.f32 s25, s18, s25 + vstmia r0!, {s8-s13} + it ge + vmulge.f32 s26, s17, s26 + it gt + vldmiagt r1!, {s8-s11} + itt ge + vmulge.f32 s27, s16, s27 + vmulge.f32 s28, s23, s28 + it gt + vldmdbgt r2!, {s4-s7} + it ge + vmulge.f32 s29, s22, s29 + vstmia r0!, {s14-s15} + ittt ge + vmulge.f32 s30, s21, s30 + vmulge.f32 s31, s20, s31 + vmulge.f32 s8, s3, s8 + it gt + vldmiagt r1!, {s12-s15} + itttt ge + vmulge.f32 s9, s2, s9 + vmulge.f32 s10, s1, s10 + vstmiage r0!, {s24-s27} + vmulge.f32 s11, s0, s11 + it ge + vstmiage r0!, {s28-s31} + bgt 1b + + vpop {d8-d15} + bx lr +endfunc + +/** + * ARM VFP implementation of 'butterflies_float_c' function + * Assume that len is a positive non-zero number + */ +@ void ff_butterflies_float_vfp(float *restrict v1, float *restrict v2, int len) +function ff_butterflies_float_vfp, export=1 +BASE1 .req a1 +BASE2 .req a2 +LEN .req a3 +OLDFPSCR .req a4 + + vpush {s16-s31} + fmrx OLDFPSCR, FPSCR + + tst LEN, #7 + beq 4f @ common case: len is a multiple of 8 + + ldr ip, =0x03000000 @ RunFast mode, scalar mode + fmxr FPSCR, ip + + tst LEN, #1 + beq 1f + vldmia BASE1!, {s0} + vldmia BASE2!, {s8} + vadd.f s16, s0, s8 + vsub.f s24, s0, s8 + vstr s16, [BASE1, #0-4*1] + vstr s24, [BASE2, #0-4*1] +1: + tst LEN, #2 + beq 2f + vldmia BASE1!, {s0-s1} + vldmia BASE2!, {s8-s9} + vadd.f s16, s0, s8 + vadd.f s17, s1, s9 + vsub.f s24, s0, s8 + vsub.f s25, s1, s9 + vstr d8, [BASE1, #0-8*1] @ s16,s17 + vstr d12, [BASE2, #0-8*1] @ s24,s25 +2: + tst LEN, #4 + beq 3f + vldmia BASE1!, {s0-s1} + vldmia BASE2!, {s8-s9} + vldmia BASE1!, {s2-s3} + vldmia BASE2!, {s10-s11} + vadd.f s16, s0, s8 + vadd.f s17, s1, s9 + vsub.f s24, s0, s8 + vsub.f s25, s1, s9 + vadd.f s18, s2, s10 + vadd.f s19, s3, s11 + vsub.f s26, s2, s10 + vsub.f s27, s3, s11 + vstr d8, [BASE1, #0-16*1] @ s16,s17 + vstr d12, [BASE2, #0-16*1] @ s24,s25 + vstr d9, [BASE1, #8-16*1] @ s18,s19 + vstr d13, [BASE2, #8-16*1] @ s26,s27 +3: + bics LEN, LEN, #7 + beq 7f +4: + ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 + fmxr FPSCR, ip + + vldmia BASE1!, {s0-s1} + vldmia BASE2!, {s8-s9} + vldmia BASE1!, {s2-s3} + vldmia BASE2!, {s10-s11} + vadd.f s16, s0, s8 + vldmia BASE1!, {s4-s5} + vldmia BASE2!, {s12-s13} + vldmia BASE1!, {s6-s7} + vldmia BASE2!, {s14-s15} + vsub.f s24, s0, s8 + vadd.f s20, s4, s12 + subs LEN, LEN, #8 + beq 6f +5: vldmia BASE1!, {s0-s3} + vldmia BASE2!, {s8-s11} + vsub.f s28, s4, s12 + vstr d8, [BASE1, #0-16*3] @ s16,s17 + vstr d9, [BASE1, #8-16*3] @ s18,s19 + vstr d12, [BASE2, #0-16*3] @ s24,s25 + vstr d13, [BASE2, #8-16*3] @ s26,s27 + vadd.f s16, s0, s8 + vldmia BASE1!, {s4-s7} + vldmia BASE2!, {s12-s15} + vsub.f s24, s0, s8 + vstr d10, [BASE1, #0-16*3] @ s20,s21 + vstr d11, [BASE1, #8-16*3] @ s22,s23 + vstr d14, [BASE2, #0-16*3] @ s28,s29 + vstr d15, [BASE2, #8-16*3] @ s30,s31 + vadd.f s20, s4, s12 + subs LEN, LEN, #8 + bne 5b +6: vsub.f s28, s4, s12 + vstr d8, [BASE1, #0-16*2] @ s16,s17 + vstr d9, [BASE1, #8-16*2] @ s18,s19 + vstr d12, [BASE2, #0-16*2] @ s24,s25 + vstr d13, [BASE2, #8-16*2] @ s26,s27 + vstr d10, [BASE1, #0-16*1] @ s20,s21 + vstr d11, [BASE1, #8-16*1] @ s22,s23 + vstr d14, [BASE2, #0-16*1] @ s28,s29 + vstr d15, [BASE2, #8-16*1] @ s30,s31 +7: + fmxr FPSCR, OLDFPSCR + vpop {s16-s31} + bx lr + + .unreq BASE1 + .unreq BASE2 + .unreq LEN + .unreq OLDFPSCR +endfunc diff --git a/media/ffvpx/libavutil/arm/intmath.h b/media/ffvpx/libavutil/arm/intmath.h new file mode 100644 index 0000000000..f19b21e98d --- /dev/null +++ b/media/ffvpx/libavutil/arm/intmath.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2010 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_ARM_INTMATH_H +#define AVUTIL_ARM_INTMATH_H + +#include + +#include "config.h" +#include "libavutil/attributes.h" + +#if HAVE_INLINE_ASM + +#if HAVE_ARMV6_INLINE + +#define av_clip_uint8 av_clip_uint8_arm +static av_always_inline av_const int av_clip_uint8_arm(int a) +{ + int x; + __asm__ ("usat %0, #8, %1" : "=r"(x) : "r"(a)); + return x; +} + +#define av_clip_int8 av_clip_int8_arm +static av_always_inline av_const int av_clip_int8_arm(int a) +{ + int x; + __asm__ ("ssat %0, #8, %1" : "=r"(x) : "r"(a)); + return x; +} + +#define av_clip_uint16 av_clip_uint16_arm +static av_always_inline av_const int av_clip_uint16_arm(int a) +{ + int x; + __asm__ ("usat %0, #16, %1" : "=r"(x) : "r"(a)); + return x; +} + +#define av_clip_int16 av_clip_int16_arm +static av_always_inline av_const int av_clip_int16_arm(int a) +{ + int x; + __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); + return x; +} + +#define av_clip_intp2 av_clip_intp2_arm +static av_always_inline av_const int av_clip_intp2_arm(int a, int p) +{ + if (av_builtin_constant_p(p)) { + unsigned x; + __asm__ ("ssat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p+1)); + return x; + } else { + if (((unsigned)a + (1 << p)) & ~((2 << p) - 1)) + return (a >> 31) ^ ((1 << p) - 1); + else + return a; + } +} + +#define av_clip_uintp2 av_clip_uintp2_arm +static av_always_inline av_const unsigned av_clip_uintp2_arm(int a, int p) +{ + if (av_builtin_constant_p(p)) { + unsigned x; + __asm__ ("usat %0, %2, %1" : "=r"(x) : "r"(a), "i"(p)); + return x; + } else { + if (a & ~((1<> 31 & ((1< +#include "config.h" +#include "libavutil/attributes.h" + +#if HAVE_FAST_UNALIGNED && HAVE_INLINE_ASM && AV_GCC_VERSION_AT_MOST(4,6) + +#define AV_RN16 AV_RN16 +static av_always_inline unsigned AV_RN16(const void *p) +{ + const uint8_t *q = p; + unsigned v; +#if AV_GCC_VERSION_AT_MOST(4,5) + __asm__ ("ldrh %0, %1" : "=r"(v) : "m"(*(const uint16_t *)q)); +#elif defined __thumb__ + __asm__ ("ldrh %0, %1" : "=r"(v) : "m"(q[0]), "m"(q[1])); +#else + __asm__ ("ldrh %0, %1" : "=r"(v) : "Uq"(q[0]), "m"(q[1])); +#endif + return v; +} + +#define AV_WN16 AV_WN16 +static av_always_inline void AV_WN16(void *p, uint16_t v) +{ + __asm__ ("strh %1, %0" : "=m"(*(uint16_t *)p) : "r"(v)); +} + +#define AV_RN32 AV_RN32 +static av_always_inline uint32_t AV_RN32(const void *p) +{ + const struct __attribute__((packed)) { uint32_t v; } *q = p; + uint32_t v; + __asm__ ("ldr %0, %1" : "=r"(v) : "m"(*q)); + return v; +} + +#define AV_WN32 AV_WN32 +static av_always_inline void AV_WN32(void *p, uint32_t v) +{ + __asm__ ("str %1, %0" : "=m"(*(uint32_t *)p) : "r"(v)); +} + +#if HAVE_ASM_MOD_Q + +#define AV_RN64 AV_RN64 +static av_always_inline uint64_t AV_RN64(const void *p) +{ + const struct __attribute__((packed)) { uint32_t v; } *q = p; + uint64_t v; + __asm__ ("ldr %Q0, %1 \n\t" + "ldr %R0, %2 \n\t" + : "=&r"(v) + : "m"(q[0]), "m"(q[1])); + return v; +} + +#define AV_WN64 AV_WN64 +static av_always_inline void AV_WN64(void *p, uint64_t v) +{ + __asm__ ("str %Q2, %0 \n\t" + "str %R2, %1 \n\t" + : "=m"(*(uint32_t*)p), "=m"(*((uint32_t*)p+1)) + : "r"(v)); +} + +#endif /* HAVE_ASM_MOD_Q */ + +#endif /* HAVE_INLINE_ASM */ + +#endif /* AVUTIL_ARM_INTREADWRITE_H */ diff --git a/media/ffvpx/libavutil/arm/moz.build b/media/ffvpx/libavutil/arm/moz.build new file mode 100644 index 0000000000..e5f9fb23e3 --- /dev/null +++ b/media/ffvpx/libavutil/arm/moz.build @@ -0,0 +1,18 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +SOURCES += [ + 'cpu.c', + 'float_dsp_init_arm.c', + 'float_dsp_init_neon.c', + 'float_dsp_init_vfp.c', + 'float_dsp_neon.S', + 'float_dsp_vfp.S', +] + +FINAL_LIBRARY = 'mozavutil' + +include('/media/ffvpx/ffvpxcommon.mozbuild') diff --git a/media/ffvpx/libavutil/arm/timer.h b/media/ffvpx/libavutil/arm/timer.h new file mode 100644 index 0000000000..caf23e2a5a --- /dev/null +++ b/media/ffvpx/libavutil/arm/timer.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_ARM_TIMER_H +#define AVUTIL_ARM_TIMER_H + +#include +#include "config.h" + +#if defined(__APPLE__) + +#include + +#define AV_READ_TIME mach_absolute_time + +#elif HAVE_INLINE_ASM && defined(__ARM_ARCH_7A__) + +#define AV_READ_TIME read_time + +static inline uint64_t read_time(void) +{ + unsigned cc; + __asm__ volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc)); + return cc; +} + +#endif /* HAVE_INLINE_ASM && __ARM_ARCH_7A__ */ + +#endif /* AVUTIL_ARM_TIMER_H */ -- cgit v1.2.3