diff options
Diffstat (limited to 'media/libopus/silk/float/x86')
-rw-r--r-- | media/libopus/silk/float/x86/inner_product_FLP_avx2.c | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/media/libopus/silk/float/x86/inner_product_FLP_avx2.c b/media/libopus/silk/float/x86/inner_product_FLP_avx2.c new file mode 100644 index 0000000000..4a2daaf595 --- /dev/null +++ b/media/libopus/silk/float/x86/inner_product_FLP_avx2.c @@ -0,0 +1,85 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. + 2023 Amazon +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "SigProc_FLP.h" +#include <immintrin.h> + + +/* inner product of two silk_float arrays, with result as double */ +double silk_inner_product_FLP_avx2( + const silk_float *data1, + const silk_float *data2, + opus_int dataSize +) +{ + opus_int i; + __m256d accum1, accum2; + double result; + + /* 4x unrolled loop */ + result = 0.0; + accum1 = accum2 = _mm256_setzero_pd(); + for( i = 0; i < dataSize - 7; i += 8 ) { + __m128 x1f, x2f; + __m256d x1d, x2d; + x1f = _mm_loadu_ps( &data1[ i ] ); + x2f = _mm_loadu_ps( &data2[ i ] ); + x1d = _mm256_cvtps_pd( x1f ); + x2d = _mm256_cvtps_pd( x2f ); + accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 ); + x1f = _mm_loadu_ps( &data1[ i + 4 ] ); + x2f = _mm_loadu_ps( &data2[ i + 4 ] ); + x1d = _mm256_cvtps_pd( x1f ); + x2d = _mm256_cvtps_pd( x2f ); + accum2 = _mm256_fmadd_pd( x1d, x2d, accum2 ); + } + for( ; i < dataSize - 3; i += 4 ) { + __m128 x1f, x2f; + __m256d x1d, x2d; + x1f = _mm_loadu_ps( &data1[ i ] ); + x2f = _mm_loadu_ps( &data2[ i ] ); + x1d = _mm256_cvtps_pd( x1f ); + x2d = _mm256_cvtps_pd( x2f ); + accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 ); + } + accum1 = _mm256_add_pd(accum1, accum2); + accum1 = _mm256_add_pd(accum1, _mm256_permute2f128_pd(accum1, accum1, 1)); + accum1 = _mm256_hadd_pd(accum1,accum1); + result = _mm256_cvtsd_f64(accum1); + + /* add any remaining products */ + for( ; i < dataSize; i++ ) { + result += data1[ i ] * (double)data2[ i ]; + } + + return result; +} |