From 40a355a42d4a9444dc753c04c6608dade2f06a23 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:13:27 +0200 Subject: Adding upstream version 125.0.1. Signed-off-by: Daniel Baumann --- media/libopus/celt/x86/x86cpu.h | 49 +++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 12 deletions(-) (limited to 'media/libopus/celt/x86/x86cpu.h') diff --git a/media/libopus/celt/x86/x86cpu.h b/media/libopus/celt/x86/x86cpu.h index 04e80489b1..8ae9be8d8f 100644 --- a/media/libopus/celt/x86/x86cpu.h +++ b/media/libopus/celt/x86/x86cpu.h @@ -46,28 +46,53 @@ # define MAY_HAVE_SSE4_1(name) name ## _c # endif -# if defined(OPUS_X86_MAY_HAVE_AVX) -# define MAY_HAVE_AVX(name) name ## _avx +# if defined(OPUS_X86_MAY_HAVE_AVX2) +# define MAY_HAVE_AVX2(name) name ## _avx2 # else -# define MAY_HAVE_AVX(name) name ## _c +# define MAY_HAVE_AVX2(name) name ## _c # endif -# if defined(OPUS_HAVE_RTCD) +# if defined(OPUS_HAVE_RTCD) && \ + ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \ + (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \ + (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ + (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2))) int opus_select_arch(void); # endif +# if defined(OPUS_X86_MAY_HAVE_SSE2) +# include "opus_defines.h" + /*MOVD should not impose any alignment restrictions, but the C standard does, and UBSan will report errors if we actually make unaligned accesses. Use this to work around those restrictions (which should hopefully all get - optimized to a single MOVD instruction).*/ -#define OP_LOADU_EPI32(x) \ - (int)((*(unsigned char *)(x) | *((unsigned char *)(x) + 1) << 8U |\ - *((unsigned char *)(x) + 2) << 16U | (opus_uint32)*((unsigned char *)(x) + 3) << 24U)) + optimized to a single MOVD instruction). + GCC implemented _mm_loadu_si32() since GCC 11; HOWEVER, there is a bug! + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99754 */ +# if !defined(_MSC_VER) && !OPUS_GNUC_PREREQ(11,3) && !(defined(__clang__) && (__clang_major__ >= 8)) +# include +# include + +# ifdef _mm_loadu_si32 +# undef _mm_loadu_si32 +# endif +# define _mm_loadu_si32 WORKAROUND_mm_loadu_si32 +static inline __m128i WORKAROUND_mm_loadu_si32(void const* mem_addr) { + int val; + memcpy(&val, mem_addr, sizeof(val)); + return _mm_cvtsi32_si128(val); +} +# elif defined(_MSC_VER) + /* MSVC needs this for _mm_loadu_si32 */ +# include +# endif -#define OP_CVTEPI8_EPI32_M32(x) \ - (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(OP_LOADU_EPI32(x)))) +# define OP_CVTEPI8_EPI32_M32(x) \ + (_mm_cvtepi8_epi32(_mm_loadu_si32(x))) -#define OP_CVTEPI16_EPI32_M64(x) \ - (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x)))) +# define OP_CVTEPI16_EPI32_M64(x) \ + (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(void*)(x)))) + +# endif #endif -- cgit v1.2.3