diff options
Diffstat (limited to 'media/libaom/0002-mmloadusi64.patch')
-rw-r--r-- | media/libaom/0002-mmloadusi64.patch | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/media/libaom/0002-mmloadusi64.patch b/media/libaom/0002-mmloadusi64.patch new file mode 100644 index 0000000000..9d23c90f22 --- /dev/null +++ b/media/libaom/0002-mmloadusi64.patch @@ -0,0 +1,79 @@ +diff --git a/aom_dsp/x86/synonyms.h b/aom_dsp/x86/synonyms.h +--- a/aom_dsp/x86/synonyms.h ++++ b/aom_dsp/x86/synonyms.h +@@ -41,22 +41,35 @@ static INLINE __m128i xx_loadl_64(const + static INLINE __m128i xx_load_128(const void *a) { + return _mm_load_si128((const __m128i *)a); + } + + static INLINE __m128i xx_loadu_128(const void *a) { + return _mm_loadu_si128((const __m128i *)a); + } + ++ ++// _mm_loadu_si64 has been introduced in GCC 9, reimplement the function ++// manually on older compilers. ++#if !defined(__clang__) && __GNUC_MAJOR__ < 9 ++static INLINE __m128i xx_loadu_2x64(const void *hi, const void *lo) { ++ __m64 hi_, lo_; ++ memcpy(&hi_, hi, sizeof(hi_)); ++ memcpy(&lo_, lo, sizeof(lo_)); ++ return _mm_set_epi64(hi_, lo_); ++} ++#endif ++#else + // Load 64 bits from each of hi and low, and pack into an SSE register + // Since directly loading as `int64_t`s and using _mm_set_epi64 may violate + // the strict aliasing rule, this takes a different approach + static INLINE __m128i xx_loadu_2x64(const void *hi, const void *lo) { + return _mm_unpacklo_epi64(_mm_loadu_si64(lo), _mm_loadu_si64(hi)); + } ++#endif + + static INLINE void xx_storel_32(void *const a, const __m128i v) { + const int val = _mm_cvtsi128_si32(v); + memcpy(a, &val, sizeof(val)); + } + + static INLINE void xx_storel_64(void *const a, const __m128i v) { + _mm_storel_epi64((__m128i *)a, v); +diff --git a/aom_dsp/x86/synonyms_avx2.h b/aom_dsp/x86/synonyms_avx2.h +--- a/aom_dsp/x86/synonyms_avx2.h ++++ b/aom_dsp/x86/synonyms_avx2.h +@@ -66,21 +66,36 @@ static INLINE __m256i yy_set1_64_from_32 + + // Some compilers don't have _mm256_set_m128i defined in immintrin.h. We + // therefore define an equivalent function using a different intrinsic. + // ([ hi ], [ lo ]) -> [ hi ][ lo ] + static INLINE __m256i yy_set_m128i(__m128i hi, __m128i lo) { + return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1); + } + ++#define GCC_VERSION (__GNUC__ * 10000 \ ++ + __GNUC_MINOR__ * 100 \ ++ + __GNUC_PATCHLEVEL__) ++ ++// _mm256_loadu2_m128i has been introduced in GCC 10.1 ++#if !defined(__clang__) && GCC_VERSION < 101000 ++static INLINE __m256i yy_loadu2_128(const void *hi, const void *lo) { ++ __m128i mhi = _mm_loadu_si128((const __m128i *)(hi)); ++ __m128i mlo = _mm_loadu_si128((const __m128i *)(lo)); ++ return _mm256_set_m128i(mhi, mlo); ++} ++#else + static INLINE __m256i yy_loadu2_128(const void *hi, const void *lo) { + __m128i mhi = _mm_loadu_si128((const __m128i *)(hi)); + __m128i mlo = _mm_loadu_si128((const __m128i *)(lo)); + return yy_set_m128i(mhi, mlo); + } ++#endif ++ ++#undef GCC_VERSION + + static INLINE void yy_storeu2_128(void *hi, void *lo, const __m256i a) { + _mm_storeu_si128((__m128i *)hi, _mm256_extracti128_si256(a, 1)); + _mm_storeu_si128((__m128i *)lo, _mm256_castsi256_si128(a)); + } + + static INLINE __m256i yy_roundn_epu16(__m256i v_val_w, int bits) { + const __m256i v_s_w = _mm256_srli_epi16(v_val_w, bits - 1); |