diff options
Diffstat (limited to 'third_party/libwebrtc/common_audio/signal_processing')
56 files changed, 10554 insertions, 0 deletions
diff --git a/third_party/libwebrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c b/third_party/libwebrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c new file mode 100644 index 0000000000..a3ec24f5da --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/auto_corr_to_refl_coef.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_AutoCorrToReflCoef(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K) +{ + int i, n; + int16_t tmp; + const int32_t *rptr; + int32_t L_num, L_den; + int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER], + P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER]; + + // Initialize loop and pointers. + acfptr = ACF; + rptr = R; + pptr = P; + p1ptr = &P[1]; + w1ptr = &W[1]; + wptr = w1ptr; + + // First loop; n=0. Determine shifting. + tmp = WebRtcSpl_NormW32(*R); + *acfptr = (int16_t)((*rptr++ << tmp) >> 16); + *pptr++ = *acfptr++; + + // Initialize ACF, P and W. + for (i = 1; i <= use_order; i++) + { + *acfptr = (int16_t)((*rptr++ << tmp) >> 16); + *wptr++ = *acfptr; + *pptr++ = *acfptr++; + } + + // Compute reflection coefficients. + for (n = 1; n <= use_order; n++, K++) + { + tmp = WEBRTC_SPL_ABS_W16(*p1ptr); + if (*P < tmp) + { + for (i = n; i <= use_order; i++) + *K++ = 0; + + return; + } + + // Division: WebRtcSpl_div(tmp, *P) + *K = 0; + if (tmp != 0) + { + L_num = tmp; + L_den = *P; + i = 15; + while (i--) + { + (*K) <<= 1; + L_num <<= 1; + if (L_num >= L_den) + { + L_num -= L_den; + (*K)++; + } + } + if (*p1ptr > 0) + *K = -*K; + } + + // Last iteration; don't do Schur recursion. + if (n == use_order) + return; + + // Schur recursion. + pptr = P; + wptr = w1ptr; + tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15); + *pptr = WebRtcSpl_AddSatW16(*pptr, tmp); + pptr++; + for (i = 1; i <= use_order - n; i++) + { + tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15); + *pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp); + pptr++; + tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15); + *wptr = WebRtcSpl_AddSatW16(*wptr, tmp); + wptr++; + } + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/auto_correlation.c b/third_party/libwebrtc/common_audio/signal_processing/auto_correlation.c new file mode 100644 index 0000000000..1455820e8f --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/auto_correlation.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#include "rtc_base/checks.h" + +size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector, + size_t in_vector_length, + size_t order, + int32_t* result, + int* scale) { + int32_t sum = 0; + size_t i = 0, j = 0; + int16_t smax = 0; + int scaling = 0; + + RTC_DCHECK_LE(order, in_vector_length); + + // Find the maximum absolute value of the samples. + smax = WebRtcSpl_MaxAbsValueW16(in_vector, in_vector_length); + + // In order to avoid overflow when computing the sum we should scale the + // samples so that (in_vector_length * smax * smax) will not overflow. + if (smax == 0) { + scaling = 0; + } else { + // Number of bits in the sum loop. + int nbits = WebRtcSpl_GetSizeInBits((uint32_t)in_vector_length); + // Number of bits to normalize smax. + int t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + + if (t > nbits) { + scaling = 0; + } else { + scaling = nbits - t; + } + } + + // Perform the actual correlation calculation. + for (i = 0; i < order + 1; i++) { + sum = 0; + /* Unroll the loop to improve performance. */ + for (j = 0; i + j + 3 < in_vector_length; j += 4) { + sum += (in_vector[j + 0] * in_vector[i + j + 0]) >> scaling; + sum += (in_vector[j + 1] * in_vector[i + j + 1]) >> scaling; + sum += (in_vector[j + 2] * in_vector[i + j + 2]) >> scaling; + sum += (in_vector[j + 3] * in_vector[i + j + 3]) >> scaling; + } + for (; j < in_vector_length - i; j++) { + sum += (in_vector[j] * in_vector[i + j]) >> scaling; + } + *result++ = sum; + } + + *scale = scaling; + return order + 1; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c b/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c new file mode 100644 index 0000000000..1c82cff50f --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +/* Tables for data buffer indexes that are bit reversed and thus need to be + * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap + * operations, while index_7[{1, 3, 5, ...}] are for the right side of the + * operation. Same for index_8. + */ + +/* Indexes for the case of stages == 7. */ +static const int16_t index_7[112] = { + 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, + 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, + 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, + 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, + 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, + 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, + 103, 115, 111, 123 +}; + +/* Indexes for the case of stages == 8. */ +static const int16_t index_8[240] = { + 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, + 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, + 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, + 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, + 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, + 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, + 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, + 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, + 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, + 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, + 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, + 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, + 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, + 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, + 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 +}; + +void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { + /* For any specific value of stages, we know exactly the indexes that are + * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of + * stages are 7 and 8, so we use tables to save unnecessary iterations and + * calculations for these two cases. + */ + if (stages == 7 || stages == 8) { + int m = 0; + int length = 112; + const int16_t* index = index_7; + + if (stages == 8) { + length = 240; + index = index_8; + } + + /* Decimation in time. Swap the elements with bit-reversed indexes. */ + for (m = 0; m < length; m += 2) { + /* We declare a int32_t* type pointer, to load both the 16-bit real + * and imaginary elements from complex_data in one instruction, reducing + * complexity. + */ + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + temp = complex_data_ptr[index[m]]; /* Real and imaginary */ + complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; + complex_data_ptr[index[m + 1]] = temp; + } + } + else { + int m = 0, mr = 0, l = 0; + int n = 1 << stages; + int nn = n - 1; + + /* Decimation in time - re-order data */ + for (m = 1; m <= nn; ++m) { + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + /* Find out indexes that are bit-reversed. */ + l = n; + do { + l >>= 1; + } while (l > nn - mr); + mr = (mr & (l - 1)) + l; + + if (mr <= m) { + continue; + } + + /* Swap the elements with bit-reversed indexes. + * This is similar to the loop in the stages == 7 or 8 cases. + */ + temp = complex_data_ptr[m]; /* Real and imaginary */ + complex_data_ptr[m] = complex_data_ptr[mr]; + complex_data_ptr[mr] = temp; + } + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse_arm.S b/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse_arm.S new file mode 100644 index 0000000000..be8e181aa7 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse_arm.S @@ -0,0 +1,119 @@ +@ +@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + +@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized +@ for ARMv5 platforms. +@ Reference C code is in file complex_bit_reverse.c. Bit-exact. + +#include "rtc_base/system/asm_defines.h" + +GLOBAL_FUNCTION WebRtcSpl_ComplexBitReverse +.align 2 +DEFINE_FUNCTION WebRtcSpl_ComplexBitReverse + push {r4-r7} + + cmp r1, #7 + adr r3, index_7 @ Table pointer. + mov r4, #112 @ Number of interations. + beq PRE_LOOP_STAGES_7_OR_8 + + cmp r1, #8 + adr r3, index_8 @ Table pointer. + mov r4, #240 @ Number of interations. + beq PRE_LOOP_STAGES_7_OR_8 + + mov r3, #1 @ Initialize m. + mov r1, r3, asl r1 @ n = 1 << stages; + subs r6, r1, #1 @ nn = n - 1; + ble END + + mov r5, r0 @ &complex_data + mov r4, #0 @ ml + +LOOP_GENERIC: + rsb r12, r4, r6 @ l > nn - mr + mov r2, r1 @ n + +LOOP_SHIFT: + asr r2, #1 @ l >>= 1; + cmp r2, r12 + bgt LOOP_SHIFT + + sub r12, r2, #1 + and r4, r12, r4 + add r4, r2 @ mr = (mr & (l - 1)) + l; + cmp r4, r3 @ mr <= m ? + ble UPDATE_REGISTERS + + mov r12, r4, asl #2 + ldr r7, [r5, #4] @ complex_data[2 * m, 2 * m + 1]. + @ Offset 4 due to m incrementing from 1. + ldr r2, [r0, r12] @ complex_data[2 * mr, 2 * mr + 1]. + str r7, [r0, r12] + str r2, [r5, #4] + +UPDATE_REGISTERS: + add r3, r3, #1 + add r5, #4 + cmp r3, r1 + bne LOOP_GENERIC + + b END + +PRE_LOOP_STAGES_7_OR_8: + add r4, r3, r4, asl #1 + +LOOP_STAGES_7_OR_8: + ldrsh r2, [r3], #2 @ index[m] + ldrsh r5, [r3], #2 @ index[m + 1] + ldr r1, [r0, r2] @ complex_data[index[m], index[m] + 1] + ldr r12, [r0, r5] @ complex_data[index[m + 1], index[m + 1] + 1] + cmp r3, r4 + str r1, [r0, r5] + str r12, [r0, r2] + bne LOOP_STAGES_7_OR_8 + +END: + pop {r4-r7} + bx lr + +@ The index tables. Note the values are doubles of the actual indexes for 16-bit +@ elements, different from the generic C code. It actually provides byte offsets +@ for the indexes. + +.align 2 +index_7: @ Indexes for stages == 7. + .short 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288 + .short 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144 + .short 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116 + .short 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156 + .short 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204 + .short 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268 + .short 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348 + .short 468, 364, 436, 380, 500, 412, 460, 444, 492 + +index_8: @ Indexes for stages == 8. + .short 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64 + .short 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544 + .short 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104 + .short 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136 + .short 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172 + .short 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204 + .short 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244 + .short 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284 + .short 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324 + .short 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372 + .short 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420 + .short 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468 + .short 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532 + .short 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596 + .short 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684 + .short 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796 + .short 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988 diff --git a/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse_mips.c b/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse_mips.c new file mode 100644 index 0000000000..9007b19cf6 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/complex_bit_reverse_mips.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +static int16_t coefTable_7[] = { + 4, 256, 8, 128, 12, 384, 16, 64, + 20, 320, 24, 192, 28, 448, 36, 288, + 40, 160, 44, 416, 48, 96, 52, 352, + 56, 224, 60, 480, 68, 272, 72, 144, + 76, 400, 84, 336, 88, 208, 92, 464, + 100, 304, 104, 176, 108, 432, 116, 368, + 120, 240, 124, 496, 132, 264, 140, 392, + 148, 328, 152, 200, 156, 456, 164, 296, + 172, 424, 180, 360, 184, 232, 188, 488, + 196, 280, 204, 408, 212, 344, 220, 472, + 228, 312, 236, 440, 244, 376, 252, 504, + 268, 388, 276, 324, 284, 452, 300, 420, + 308, 356, 316, 484, 332, 404, 348, 468, + 364, 436, 380, 500, 412, 460, 444, 492 +}; + +static int16_t coefTable_8[] = { + 4, 512, 8, 256, 12, 768, 16, 128, + 20, 640, 24, 384, 28, 896, 32, 64, + 36, 576, 40, 320, 44, 832, 48, 192, + 52, 704, 56, 448, 60, 960, 68, 544, + 72, 288, 76, 800, 80, 160, 84, 672, + 88, 416, 92, 928, 100, 608, 104, 352, + 108, 864, 112, 224, 116, 736, 120, 480, + 124, 992, 132, 528, 136, 272, 140, 784, + 148, 656, 152, 400, 156, 912, 164, 592, + 168, 336, 172, 848, 176, 208, 180, 720, + 184, 464, 188, 976, 196, 560, 200, 304, + 204, 816, 212, 688, 216, 432, 220, 944, + 228, 624, 232, 368, 236, 880, 244, 752, + 248, 496, 252, 1008, 260, 520, 268, 776, + 276, 648, 280, 392, 284, 904, 292, 584, + 296, 328, 300, 840, 308, 712, 312, 456, + 316, 968, 324, 552, 332, 808, 340, 680, + 344, 424, 348, 936, 356, 616, 364, 872, + 372, 744, 376, 488, 380, 1000, 388, 536, + 396, 792, 404, 664, 412, 920, 420, 600, + 428, 856, 436, 728, 440, 472, 444, 984, + 452, 568, 460, 824, 468, 696, 476, 952, + 484, 632, 492, 888, 500, 760, 508, 1016, + 524, 772, 532, 644, 540, 900, 548, 580, + 556, 836, 564, 708, 572, 964, 588, 804, + 596, 676, 604, 932, 620, 868, 628, 740, + 636, 996, 652, 788, 668, 916, 684, 852, + 692, 724, 700, 980, 716, 820, 732, 948, + 748, 884, 764, 1012, 796, 908, 812, 844, + 828, 972, 860, 940, 892, 1004, 956, 988 +}; + +void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) { + int l; + int16_t tr, ti; + int32_t tmp1, tmp2, tmp3, tmp4; + int32_t* ptr_i; + int32_t* ptr_j; + + if (stages == 8) { + int16_t* pcoeftable_8 = coefTable_8; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[l], $zero, 120 \n\t" + "1: \n\t" + "addiu %[l], %[l], -4 \n\t" + "lh %[tr], 0(%[pcoeftable_8]) \n\t" + "lh %[ti], 2(%[pcoeftable_8]) \n\t" + "lh %[tmp3], 4(%[pcoeftable_8]) \n\t" + "lh %[tmp4], 6(%[pcoeftable_8]) \n\t" + "addu %[ptr_i], %[frfi], %[tr] \n\t" + "addu %[ptr_j], %[frfi], %[ti] \n\t" + "addu %[tr], %[frfi], %[tmp3] \n\t" + "addu %[ti], %[frfi], %[tmp4] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "lh %[tmp1], 8(%[pcoeftable_8]) \n\t" + "lh %[tmp2], 10(%[pcoeftable_8]) \n\t" + "lh %[tr], 12(%[pcoeftable_8]) \n\t" + "lh %[ti], 14(%[pcoeftable_8]) \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[frfi], %[tmp2] \n\t" + "addu %[tr], %[frfi], %[tr] \n\t" + "addu %[ti], %[frfi], %[ti] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "bgtz %[l], 1b \n\t" + " addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t" + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i), + [ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l), + [tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8), + [ti] "=&r" (ti), [tmp4] "=&r" (tmp4) + : [frfi] "r" (frfi) + : "memory" + ); + } else if (stages == 7) { + int16_t* pcoeftable_7 = coefTable_7; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[l], $zero, 56 \n\t" + "1: \n\t" + "addiu %[l], %[l], -4 \n\t" + "lh %[tr], 0(%[pcoeftable_7]) \n\t" + "lh %[ti], 2(%[pcoeftable_7]) \n\t" + "lh %[tmp3], 4(%[pcoeftable_7]) \n\t" + "lh %[tmp4], 6(%[pcoeftable_7]) \n\t" + "addu %[ptr_i], %[frfi], %[tr] \n\t" + "addu %[ptr_j], %[frfi], %[ti] \n\t" + "addu %[tr], %[frfi], %[tmp3] \n\t" + "addu %[ti], %[frfi], %[tmp4] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "lh %[tmp1], 8(%[pcoeftable_7]) \n\t" + "lh %[tmp2], 10(%[pcoeftable_7]) \n\t" + "lh %[tr], 12(%[pcoeftable_7]) \n\t" + "lh %[ti], 14(%[pcoeftable_7]) \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[frfi], %[tmp2] \n\t" + "addu %[tr], %[frfi], %[tr] \n\t" + "addu %[ti], %[frfi], %[ti] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "bgtz %[l], 1b \n\t" + " addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t" + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i), + [ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr), + [l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7), + [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4) + : [frfi] "r" (frfi) + : "memory" + ); + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/complex_fft.c b/third_party/libwebrtc/common_audio/signal_processing/complex_fft.c new file mode 100644 index 0000000000..ddc9a97b59 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/complex_fft.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_ComplexFFT(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/complex_fft_tables.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/system/arch.h" + +#define CFFTSFT 14 +#define CFFTRND 1 +#define CFFTRND2 16384 + +#define CIFFTSFT 14 +#define CIFFTRND 1 + + +int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) +{ + int i, j, l, k, istep, n, m; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = 1 << stages; + if (n > 1024) + return -1; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); + } + } + + --k; + l = istep; + + } + + } else + { + // mode==1: High-complexity and High-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," + " lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cfftrnd]"r"(CFFTRND)); +#else + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND; +#endif + + tr32 >>= 15 - CFFTSFT; + ti32 >>= 15 - CFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT); + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); + } + } + + --k; + l = istep; + } + } + return 0; +} + +int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) +{ + size_t i, j, l, istep, n, m; + int k, scale, shift; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + int32_t tmp32, round2; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = ((size_t)1) << stages; + if (n > 1024) + return -1; + + scale = 0; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + while (l < n) + { + // variable scaling, depending upon data + shift = 0; + round2 = 8192; + + tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); + if (tmp32 > 13573) + { + shift++; + scale++; + round2 <<= 1; + } + if (tmp32 > 27146) + { + shift++; + scale++; + round2 <<= 1; + } + + istep = l << 1; + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); + } + } + } else + { + // mode==1: High-complexity and High-accuracy mode + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cifftrnd]"r"(CIFFTRND) + ); +#else + + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND; +#endif + tr32 >>= 15 - CIFFTSFT; + ti32 >>= 15 - CIFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT); + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + round2) >> (shift + CIFFTSFT)); + } + } + + } + --k; + l = istep; + } + return scale; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/complex_fft_mips.c b/third_party/libwebrtc/common_audio/signal_processing/complex_fft_mips.c new file mode 100644 index 0000000000..27071f8b39 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/complex_fft_mips.c @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "common_audio/signal_processing/complex_fft_tables.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#define CFFTSFT 14 +#define CFFTRND 1 +#define CFFTRND2 16384 + +#define CIFFTSFT 14 +#define CIFFTRND 1 + +int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) { + int i = 0; + int l = 0; + int k = 0; + int istep = 0; + int n = 0; + int m = 0; + int32_t wr = 0, wi = 0; + int32_t tmp1 = 0; + int32_t tmp2 = 0; + int32_t tmp3 = 0; + int32_t tmp4 = 0; + int32_t tmp5 = 0; + int32_t tmp6 = 0; + int32_t tmp = 0; + int16_t* ptr_j = NULL; + int16_t* ptr_i = NULL; + + n = 1 << stages; + if (n > 1024) { + return -1; + } + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "addiu %[k], $zero, 10 \n\t" + "addiu %[l], $zero, 1 \n\t" + "3: \n\t" + "sll %[istep], %[l], 1 \n\t" + "move %[m], $zero \n\t" + "sll %[tmp], %[l], 2 \n\t" + "move %[i], $zero \n\t" + "2: \n\t" +#if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addiu %[tmp2], %[tmp3], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" + "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" + "addiu %[ptr_i], %[ptr_j], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lh %[wi], 0(%[ptr_j]) \n\t" + "lh %[wr], 0(%[ptr_i]) \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "1: \n\t" + "sll %[tmp1], %[i], 2 \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" + "lh %[tmp6], 0(%[ptr_i]) \n\t" + "lh %[tmp5], 2(%[ptr_i]) \n\t" + "lh %[tmp3], 0(%[ptr_j]) \n\t" + "lh %[tmp4], 2(%[ptr_j]) \n\t" + "addu %[i], %[i], %[istep] \n\t" +#if defined(MIPS_DSP_R2_LE) + "mult %[wr], %[tmp3] \n\t" + "madd %[wi], %[tmp4] \n\t" + "mult $ac1, %[wr], %[tmp4] \n\t" + "msub $ac1, %[wi], %[tmp3] \n\t" + "mflo %[tmp1] \n\t" + "mflo %[tmp2], $ac1 \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "shra_r.w %[tmp1], %[tmp1], 1 \n\t" + "shra_r.w %[tmp2], %[tmp2], 1 \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "shra_r.w %[tmp1], %[tmp1], 15 \n\t" + "shra_r.w %[tmp6], %[tmp6], 15 \n\t" + "shra_r.w %[tmp4], %[tmp4], 15 \n\t" + "shra_r.w %[tmp5], %[tmp5], 15 \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "mul %[tmp2], %[wr], %[tmp4] \n\t" + "mul %[tmp1], %[wr], %[tmp3] \n\t" + "mul %[tmp4], %[wi], %[tmp4] \n\t" + "mul %[tmp3], %[wi], %[tmp3] \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "addiu %[tmp6], %[tmp6], 16384 \n\t" + "addiu %[tmp5], %[tmp5], 16384 \n\t" + "addu %[tmp1], %[tmp1], %[tmp4] \n\t" + "subu %[tmp2], %[tmp2], %[tmp3] \n\t" + "addiu %[tmp1], %[tmp1], 1 \n\t" + "addiu %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp1], %[tmp1], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "sra %[tmp4], %[tmp4], 15 \n\t" + "sra %[tmp1], %[tmp1], 15 \n\t" + "sra %[tmp6], %[tmp6], 15 \n\t" + "sra %[tmp5], %[tmp5], 15 \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "sh %[tmp1], 0(%[ptr_i]) \n\t" + "sh %[tmp6], 2(%[ptr_i]) \n\t" + "sh %[tmp4], 0(%[ptr_j]) \n\t" + "blt %[i], %[n], 1b \n\t" + " sh %[tmp5], 2(%[ptr_j]) \n\t" + "blt %[m], %[l], 2b \n\t" + " addu %[i], $zero, %[m] \n\t" + "move %[l], %[istep] \n\t" + "blt %[l], %[n], 3b \n\t" + " addiu %[k], %[k], -1 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), + [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr), + [m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k), + [ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp) + : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024) + : "hi", "lo", "memory" +#if defined(MIPS_DSP_R2_LE) + , "$ac1hi", "$ac1lo" +#endif // #if defined(MIPS_DSP_R2_LE) + ); + + return 0; +} + +int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) { + int i = 0, l = 0, k = 0; + int istep = 0, n = 0, m = 0; + int scale = 0, shift = 0; + int32_t wr = 0, wi = 0; + int32_t tmp1 = 0, tmp2 = 0, tmp3 = 0, tmp4 = 0; + int32_t tmp5 = 0, tmp6 = 0, tmp = 0, tempMax = 0, round2 = 0; + int16_t* ptr_j = NULL; + int16_t* ptr_i = NULL; + + n = 1 << stages; + if (n > 1024) { + return -1; + } + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "addiu %[k], $zero, 10 \n\t" + "addiu %[l], $zero, 1 \n\t" + "move %[scale], $zero \n\t" + "3: \n\t" + "addiu %[shift], $zero, 14 \n\t" + "addiu %[round2], $zero, 8192 \n\t" + "move %[ptr_i], %[frfi] \n\t" + "move %[tempMax], $zero \n\t" + "addu %[i], %[n], %[n] \n\t" + "5: \n\t" + "lh %[tmp1], 0(%[ptr_i]) \n\t" + "lh %[tmp2], 2(%[ptr_i]) \n\t" + "lh %[tmp3], 4(%[ptr_i]) \n\t" + "lh %[tmp4], 6(%[ptr_i]) \n\t" +#if defined(MIPS_DSP_R1_LE) + "absq_s.w %[tmp1], %[tmp1] \n\t" + "absq_s.w %[tmp2], %[tmp2] \n\t" + "absq_s.w %[tmp3], %[tmp3] \n\t" + "absq_s.w %[tmp4], %[tmp4] \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp5], %[tmp1], $zero \n\t" + "subu %[tmp6], $zero, %[tmp1] \n\t" + "movn %[tmp1], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp2], $zero \n\t" + "subu %[tmp6], $zero, %[tmp2] \n\t" + "movn %[tmp2], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp3], $zero \n\t" + "subu %[tmp6], $zero, %[tmp3] \n\t" + "movn %[tmp3], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp4], $zero \n\t" + "subu %[tmp6], $zero, %[tmp4] \n\t" + "movn %[tmp4], %[tmp6], %[tmp5] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp5], %[tempMax], %[tmp1] \n\t" + "movn %[tempMax], %[tmp1], %[tmp5] \n\t" + "addiu %[i], %[i], -4 \n\t" + "slt %[tmp5], %[tempMax], %[tmp2] \n\t" + "movn %[tempMax], %[tmp2], %[tmp5] \n\t" + "slt %[tmp5], %[tempMax], %[tmp3] \n\t" + "movn %[tempMax], %[tmp3], %[tmp5] \n\t" + "slt %[tmp5], %[tempMax], %[tmp4] \n\t" + "movn %[tempMax], %[tmp4], %[tmp5] \n\t" + "bgtz %[i], 5b \n\t" + " addiu %[ptr_i], %[ptr_i], 8 \n\t" + "addiu %[tmp1], $zero, 13573 \n\t" + "addiu %[tmp2], $zero, 27146 \n\t" +#if !defined(MIPS32_R2_LE) + "sll %[tempMax], %[tempMax], 16 \n\t" + "sra %[tempMax], %[tempMax], 16 \n\t" +#else // #if !defined(MIPS32_R2_LE) + "seh %[tempMax] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "slt %[tmp1], %[tmp1], %[tempMax] \n\t" + "slt %[tmp2], %[tmp2], %[tempMax] \n\t" + "addu %[tmp1], %[tmp1], %[tmp2] \n\t" + "addu %[shift], %[shift], %[tmp1] \n\t" + "addu %[scale], %[scale], %[tmp1] \n\t" + "sllv %[round2], %[round2], %[tmp1] \n\t" + "sll %[istep], %[l], 1 \n\t" + "move %[m], $zero \n\t" + "sll %[tmp], %[l], 2 \n\t" + "2: \n\t" +#if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addiu %[tmp2], %[tmp3], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" + "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" + "addiu %[ptr_i], %[ptr_j], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lh %[wi], 0(%[ptr_j]) \n\t" + "lh %[wr], 0(%[ptr_i]) \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "1: \n\t" + "sll %[tmp1], %[i], 2 \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" + "lh %[tmp3], 0(%[ptr_j]) \n\t" + "lh %[tmp4], 2(%[ptr_j]) \n\t" + "lh %[tmp6], 0(%[ptr_i]) \n\t" + "lh %[tmp5], 2(%[ptr_i]) \n\t" + "addu %[i], %[i], %[istep] \n\t" +#if defined(MIPS_DSP_R2_LE) + "mult %[wr], %[tmp3] \n\t" + "msub %[wi], %[tmp4] \n\t" + "mult $ac1, %[wr], %[tmp4] \n\t" + "madd $ac1, %[wi], %[tmp3] \n\t" + "mflo %[tmp1] \n\t" + "mflo %[tmp2], $ac1 \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "shra_r.w %[tmp1], %[tmp1], 1 \n\t" + "shra_r.w %[tmp2], %[tmp2], 1 \n\t" + "addu %[tmp6], %[tmp6], %[round2] \n\t" + "addu %[tmp5], %[tmp5], %[round2] \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "srav %[tmp4], %[tmp4], %[shift] \n\t" + "srav %[tmp1], %[tmp1], %[shift] \n\t" + "srav %[tmp6], %[tmp6], %[shift] \n\t" + "srav %[tmp5], %[tmp5], %[shift] \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "mul %[tmp1], %[wr], %[tmp3] \n\t" + "mul %[tmp2], %[wr], %[tmp4] \n\t" + "mul %[tmp4], %[wi], %[tmp4] \n\t" + "mul %[tmp3], %[wi], %[tmp3] \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "sub %[tmp1], %[tmp1], %[tmp4] \n\t" + "addu %[tmp2], %[tmp2], %[tmp3] \n\t" + "addiu %[tmp1], %[tmp1], 1 \n\t" + "addiu %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp1], %[tmp1], 1 \n\t" + "addu %[tmp6], %[tmp6], %[round2] \n\t" + "addu %[tmp5], %[tmp5], %[round2] \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "sra %[tmp4], %[tmp4], %[shift] \n\t" + "sra %[tmp1], %[tmp1], %[shift] \n\t" + "sra %[tmp6], %[tmp6], %[shift] \n\t" + "sra %[tmp5], %[tmp5], %[shift] \n\t" +#endif // #if defined(MIPS_DSP_R2_LE) + "sh %[tmp1], 0(%[ptr_i]) \n\t" + "sh %[tmp6], 2(%[ptr_i]) \n\t" + "sh %[tmp4], 0(%[ptr_j]) \n\t" + "blt %[i], %[n], 1b \n\t" + " sh %[tmp5], 2(%[ptr_j]) \n\t" + "blt %[m], %[l], 2b \n\t" + " addu %[i], $zero, %[m] \n\t" + "move %[l], %[istep] \n\t" + "blt %[l], %[n], 3b \n\t" + " addiu %[k], %[k], -1 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), + [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp), + [istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l), + [k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j), + [shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax) + : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024) + : "hi", "lo", "memory" +#if defined(MIPS_DSP_R2_LE) + , "$ac1hi", "$ac1lo" +#endif // #if defined(MIPS_DSP_R2_LE) + ); + + return scale; + +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/complex_fft_tables.h b/third_party/libwebrtc/common_audio/signal_processing/complex_fft_tables.h new file mode 100644 index 0000000000..90fac072d2 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/complex_fft_tables.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ + +#include <stdint.h> + +static const int16_t kSinTable1024[] = { + 0, 201, 402, 603, 804, 1005, 1206, 1406, 1607, + 1808, 2009, 2209, 2410, 2610, 2811, 3011, 3211, 3411, + 3611, 3811, 4011, 4210, 4409, 4608, 4807, 5006, 5205, + 5403, 5601, 5799, 5997, 6195, 6392, 6589, 6786, 6982, + 7179, 7375, 7571, 7766, 7961, 8156, 8351, 8545, 8739, + 8932, 9126, 9319, 9511, 9703, 9895, 10087, 10278, 10469, + 10659, 10849, 11038, 11227, 11416, 11604, 11792, 11980, 12166, + 12353, 12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827, + 14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268, 15446, + 15623, 15799, 15975, 16150, 16325, 16499, 16672, 16845, 17017, + 17189, 17360, 17530, 17699, 17868, 18036, 18204, 18371, 18537, + 18702, 18867, 19031, 19194, 19357, 19519, 19680, 19840, 20000, + 20159, 20317, 20474, 20631, 20787, 20942, 21096, 21249, 21402, + 21554, 21705, 21855, 22004, 22153, 22301, 22448, 22594, 22739, + 22883, 23027, 23169, 23311, 23452, 23592, 23731, 23869, 24006, + 24143, 24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201, + 25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198, 26318, + 26437, 26556, 26673, 26789, 26905, 27019, 27132, 27244, 27355, + 27466, 27575, 27683, 27790, 27896, 28001, 28105, 28208, 28309, + 28410, 28510, 28608, 28706, 28802, 28897, 28992, 29085, 29177, + 29268, 29358, 29446, 29534, 29621, 29706, 29790, 29873, 29955, + 30036, 30116, 30195, 30272, 30349, 30424, 30498, 30571, 30643, + 30713, 30783, 30851, 30918, 30984, 31049, 31113, 31175, 31236, + 31297, 31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735, + 31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097, 32137, + 32176, 32213, 32249, 32284, 32318, 32350, 32382, 32412, 32441, + 32468, 32495, 32520, 32544, 32567, 32588, 32609, 32628, 32646, + 32662, 32678, 32692, 32705, 32717, 32727, 32736, 32744, 32751, + 32757, 32761, 32764, 32766, 32767, 32766, 32764, 32761, 32757, + 32751, 32744, 32736, 32727, 32717, 32705, 32692, 32678, 32662, + 32646, 32628, 32609, 32588, 32567, 32544, 32520, 32495, 32468, + 32441, 32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176, + 32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833, 31785, + 31735, 31684, 31633, 31580, 31525, 31470, 31413, 31356, 31297, + 31236, 31175, 31113, 31049, 30984, 30918, 30851, 30783, 30713, + 30643, 30571, 30498, 30424, 30349, 30272, 30195, 30116, 30036, + 29955, 29873, 29790, 29706, 29621, 29534, 29446, 29358, 29268, + 29177, 29085, 28992, 28897, 28802, 28706, 28608, 28510, 28410, + 28309, 28208, 28105, 28001, 27896, 27790, 27683, 27575, 27466, + 27355, 27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437, + 26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456, 25329, + 25201, 25072, 24942, 24811, 24679, 24546, 24413, 24278, 24143, + 24006, 23869, 23731, 23592, 23452, 23311, 23169, 23027, 22883, + 22739, 22594, 22448, 22301, 22153, 22004, 21855, 21705, 21554, + 21402, 21249, 21096, 20942, 20787, 20631, 20474, 20317, 20159, + 20000, 19840, 19680, 19519, 19357, 19194, 19031, 18867, 18702, + 18537, 18371, 18204, 18036, 17868, 17699, 17530, 17360, 17189, + 17017, 16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623, + 15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191, 14009, + 13827, 13645, 13462, 13278, 13094, 12909, 12724, 12539, 12353, + 12166, 11980, 11792, 11604, 11416, 11227, 11038, 10849, 10659, + 10469, 10278, 10087, 9895, 9703, 9511, 9319, 9126, 8932, + 8739, 8545, 8351, 8156, 7961, 7766, 7571, 7375, 7179, + 6982, 6786, 6589, 6392, 6195, 5997, 5799, 5601, 5403, + 5205, 5006, 4807, 4608, 4409, 4210, 4011, 3811, 3611, + 3411, 3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808, + 1607, 1406, 1206, 1005, 804, 603, 402, 201, 0, + -201, -402, -603, -804, -1005, -1206, -1406, -1607, -1808, + -2009, -2209, -2410, -2610, -2811, -3011, -3211, -3411, -3611, + -3811, -4011, -4210, -4409, -4608, -4807, -5006, -5205, -5403, + -5601, -5799, -5997, -6195, -6392, -6589, -6786, -6982, -7179, + -7375, -7571, -7766, -7961, -8156, -8351, -8545, -8739, -8932, + -9126, -9319, -9511, -9703, -9895, -10087, -10278, -10469, -10659, + -10849, -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353, + -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, -14009, + -14191, -14372, -14552, -14732, -14911, -15090, -15268, -15446, -15623, + -15799, -15975, -16150, -16325, -16499, -16672, -16845, -17017, -17189, + -17360, -17530, -17699, -17868, -18036, -18204, -18371, -18537, -18702, + -18867, -19031, -19194, -19357, -19519, -19680, -19840, -20000, -20159, + -20317, -20474, -20631, -20787, -20942, -21096, -21249, -21402, -21554, + -21705, -21855, -22004, -22153, -22301, -22448, -22594, -22739, -22883, + -23027, -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143, + -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, -25329, + -25456, -25582, -25707, -25831, -25954, -26077, -26198, -26318, -26437, + -26556, -26673, -26789, -26905, -27019, -27132, -27244, -27355, -27466, + -27575, -27683, -27790, -27896, -28001, -28105, -28208, -28309, -28410, + -28510, -28608, -28706, -28802, -28897, -28992, -29085, -29177, -29268, + -29358, -29446, -29534, -29621, -29706, -29790, -29873, -29955, -30036, + -30116, -30195, -30272, -30349, -30424, -30498, -30571, -30643, -30713, + -30783, -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297, + -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, -31785, + -31833, -31880, -31926, -31970, -32014, -32056, -32097, -32137, -32176, + -32213, -32249, -32284, -32318, -32350, -32382, -32412, -32441, -32468, + -32495, -32520, -32544, -32567, -32588, -32609, -32628, -32646, -32662, + -32678, -32692, -32705, -32717, -32727, -32736, -32744, -32751, -32757, + -32761, -32764, -32766, -32767, -32766, -32764, -32761, -32757, -32751, + -32744, -32736, -32727, -32717, -32705, -32692, -32678, -32662, -32646, + -32628, -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441, + -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, -32137, + -32097, -32056, -32014, -31970, -31926, -31880, -31833, -31785, -31735, + -31684, -31633, -31580, -31525, -31470, -31413, -31356, -31297, -31236, + -31175, -31113, -31049, -30984, -30918, -30851, -30783, -30713, -30643, + -30571, -30498, -30424, -30349, -30272, -30195, -30116, -30036, -29955, + -29873, -29790, -29706, -29621, -29534, -29446, -29358, -29268, -29177, + -29085, -28992, -28897, -28802, -28706, -28608, -28510, -28410, -28309, + -28208, -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355, + -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, -26318, + -26198, -26077, -25954, -25831, -25707, -25582, -25456, -25329, -25201, + -25072, -24942, -24811, -24679, -24546, -24413, -24278, -24143, -24006, + -23869, -23731, -23592, -23452, -23311, -23169, -23027, -22883, -22739, + -22594, -22448, -22301, -22153, -22004, -21855, -21705, -21554, -21402, + -21249, -21096, -20942, -20787, -20631, -20474, -20317, -20159, -20000, + -19840, -19680, -19519, -19357, -19194, -19031, -18867, -18702, -18537, + -18371, -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017, + -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, -15446, + -15268, -15090, -14911, -14732, -14552, -14372, -14191, -14009, -13827, + -13645, -13462, -13278, -13094, -12909, -12724, -12539, -12353, -12166, + -11980, -11792, -11604, -11416, -11227, -11038, -10849, -10659, -10469, + -10278, -10087, -9895, -9703, -9511, -9319, -9126, -8932, -8739, + -8545, -8351, -8156, -7961, -7766, -7571, -7375, -7179, -6982, + -6786, -6589, -6392, -6195, -5997, -5799, -5601, -5403, -5205, + -5006, -4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411, + -3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808, -1607, + -1406, -1206, -1005, -804, -603, -402, -201}; + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ diff --git a/third_party/libwebrtc/common_audio/signal_processing/copy_set_operations.c b/third_party/libwebrtc/common_audio/signal_processing/copy_set_operations.c new file mode 100644 index 0000000000..ae709d40f0 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/copy_set_operations.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the implementation of functions + * WebRtcSpl_MemSetW16() + * WebRtcSpl_MemSetW32() + * WebRtcSpl_MemCpyReversedOrder() + * WebRtcSpl_CopyFromEndW16() + * WebRtcSpl_ZerosArrayW16() + * WebRtcSpl_ZerosArrayW32() + * + * The description header can be found in signal_processing_library.h + * + */ + +#include <string.h> +#include "common_audio/signal_processing/include/signal_processing_library.h" + + +void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length) +{ + size_t j; + int16_t *arrptr = ptr; + + for (j = length; j > 0; j--) + { + *arrptr++ = set_value; + } +} + +void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length) +{ + size_t j; + int32_t *arrptr = ptr; + + for (j = length; j > 0; j--) + { + *arrptr++ = set_value; + } +} + +void WebRtcSpl_MemCpyReversedOrder(int16_t* dest, + int16_t* source, + size_t length) +{ + size_t j; + int16_t* destPtr = dest; + int16_t* sourcePtr = source; + + for (j = 0; j < length; j++) + { + *destPtr-- = *sourcePtr++; + } +} + +void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in, + size_t length, + size_t samples, + int16_t *vector_out) +{ + // Copy the last <samples> of the input vector to vector_out + WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples); +} + +void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length) +{ + WebRtcSpl_MemSetW16(vector, 0, length); +} + +void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length) +{ + WebRtcSpl_MemSetW32(vector, 0, length); +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/cross_correlation.c b/third_party/libwebrtc/common_audio/signal_processing/cross_correlation.c new file mode 100644 index 0000000000..c6267c92c2 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/cross_correlation.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ +void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + size_t i = 0, j = 0; + + for (i = 0; i < dim_cross_correlation; i++) { + int32_t corr = 0; + for (j = 0; j < dim_seq; j++) + corr += (seq1[j] * seq2[j]) >> right_shifts; + seq2 += step_seq2; + *cross_correlation++ = corr; + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/cross_correlation_mips.c b/third_party/libwebrtc/common_audio/signal_processing/cross_correlation_mips.c new file mode 100644 index 0000000000..c395101900 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/cross_correlation_mips.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + + int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0; + int16_t *pseq2 = NULL; + int16_t *pseq1 = NULL; + int16_t *pseq1_0 = (int16_t*)&seq1[0]; + int16_t *pseq2_0 = (int16_t*)&seq2[0]; + int k = 0; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "sll %[step_seq2], %[step_seq2], 1 \n\t" + "andi %[t0], %[dim_seq], 1 \n\t" + "bgtz %[t0], 3f \n\t" + " nop \n\t" + "1: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "xor %[sum], %[sum], %[sum] \n\t" + "2: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 2b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 1b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "b 6f \n\t" + " nop \n\t" + "3: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "beqz %[k], 5f \n\t" + " xor %[sum], %[sum], %[sum] \n\t" + "4: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 4b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "5: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 3b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "6: \n\t" + ".set pop \n\t" + : [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1), + [t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1), + [pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0), + [k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum), + [cc] "+r" (cross_correlation) + : [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts) + : "hi", "lo", "memory" + ); +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/cross_correlation_neon.c b/third_party/libwebrtc/common_audio/signal_processing/cross_correlation_neon.c new file mode 100644 index 0000000000..f2afbdf9f5 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/cross_correlation_neon.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/system/arch.h" + +#include <arm_neon.h> + +static inline void DotProductWithScaleNeon(int32_t* cross_correlation, + const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling) { + size_t i = 0; + size_t len1 = length >> 3; + size_t len2 = length & 7; + int64x2_t sum0 = vdupq_n_s64(0); + int64x2_t sum1 = vdupq_n_s64(0); + + for (i = len1; i > 0; i -= 1) { + int16x8_t seq1_16x8 = vld1q_s16(vector1); + int16x8_t seq2_16x8 = vld1q_s16(vector2); +#if defined(WEBRTC_ARCH_ARM64) + int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), + vget_low_s16(seq2_16x8)); + int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8); +#else + int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), + vget_low_s16(seq2_16x8)); + int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8), + vget_high_s16(seq2_16x8)); +#endif + sum0 = vpadalq_s32(sum0, tmp0); + sum1 = vpadalq_s32(sum1, tmp1); + vector1 += 8; + vector2 += 8; + } + + // Calculate the rest of the samples. + int64_t sum_res = 0; + for (i = len2; i > 0; i -= 1) { + sum_res += WEBRTC_SPL_MUL_16_16(*vector1, *vector2); + vector1++; + vector2++; + } + + sum0 = vaddq_s64(sum0, sum1); +#if defined(WEBRTC_ARCH_ARM64) + int64_t sum2 = vaddvq_s64(sum0); + *cross_correlation = (int32_t)((sum2 + sum_res) >> scaling); +#else + int64x1_t shift = vdup_n_s64(-scaling); + int64x1_t sum2 = vadd_s64(vget_low_s64(sum0), vget_high_s64(sum0)); + sum2 = vadd_s64(sum2, vdup_n_s64(sum_res)); + sum2 = vshl_s64(sum2, shift); + vst1_lane_s32(cross_correlation, vreinterpret_s32_s64(sum2), 0); +#endif +} + +/* NEON version of WebRtcSpl_CrossCorrelation() for ARM32/64 platforms. */ +void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + size_t i = 0; + + for (i = 0; i < dim_cross_correlation; i++) { + const int16_t* seq1_ptr = seq1; + const int16_t* seq2_ptr = seq2 + (step_seq2 * i); + + DotProductWithScaleNeon(cross_correlation, + seq1_ptr, + seq2_ptr, + dim_seq, + right_shifts); + cross_correlation++; + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/division_operations.c b/third_party/libwebrtc/common_audio/signal_processing/division_operations.c new file mode 100644 index 0000000000..4764ddfccd --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/division_operations.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the divisions + * WebRtcSpl_DivU32U16() + * WebRtcSpl_DivW32W16() + * WebRtcSpl_DivW32W16ResW16() + * WebRtcSpl_DivResultInQ31() + * WebRtcSpl_DivW32HiLow() + * + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/sanitizer.h" + +uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (uint32_t)(num / den); + } else + { + return (uint32_t)0xFFFFFFFF; + } +} + +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (int32_t)(num / den); + } else + { + return (int32_t)0x7FFFFFFF; + } +} + +int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (int16_t)(num / den); + } else + { + return (int16_t)0x7FFF; + } +} + +int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) +{ + int32_t L_num = num; + int32_t L_den = den; + int32_t div = 0; + int k = 31; + int change_sign = 0; + + if (num == 0) + return 0; + + if (num < 0) + { + change_sign++; + L_num = -num; + } + if (den < 0) + { + change_sign++; + L_den = -den; + } + while (k--) + { + div <<= 1; + L_num <<= 1; + if (L_num >= L_den) + { + L_num -= L_den; + div++; + } + } + if (change_sign == 1) + { + div = -div; + } + return div; +} + +int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) +{ + int16_t approx, tmp_hi, tmp_low, num_hi, num_low; + int32_t tmpW32; + + approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); + // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) + + // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) + tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1); + // tmpW32 = den * approx + + // result in Q30 (tmpW32 = 2.0-(den*approx)) + tmpW32 = (int32_t)((int64_t)0x7fffffffL - tmpW32); + + // Store tmpW32 in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + + // tmpW32 = 1/den in Q29 + tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1; + + // 1/den in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Store num in hi and low format + num_hi = (int16_t)(num >> 16); + num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); + + // num * (1/den) by 32 bit multiplication (result in Q28) + + tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) + + (num_low * tmp_hi >> 15); + + // Put result in Q31 (convert from Q28) + tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); + + return tmpW32; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/dot_product_with_scale.cc b/third_party/libwebrtc/common_audio/signal_processing/dot_product_with_scale.cc new file mode 100644 index 0000000000..00799dae02 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/dot_product_with_scale.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/dot_product_with_scale.h" + +#include "rtc_base/numerics/safe_conversions.h" + +int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling) { + int64_t sum = 0; + size_t i = 0; + + /* Unroll the loop to improve performance. */ + for (i = 0; i + 3 < length; i += 4) { + sum += (vector1[i + 0] * vector2[i + 0]) >> scaling; + sum += (vector1[i + 1] * vector2[i + 1]) >> scaling; + sum += (vector1[i + 2] * vector2[i + 2]) >> scaling; + sum += (vector1[i + 3] * vector2[i + 3]) >> scaling; + } + for (; i < length; i++) { + sum += (vector1[i] * vector2[i]) >> scaling; + } + + return rtc::saturated_cast<int32_t>(sum); +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/dot_product_with_scale.h b/third_party/libwebrtc/common_audio/signal_processing/dot_product_with_scale.h new file mode 100644 index 0000000000..9f0d922aaf --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/dot_product_with_scale.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ + +#include <stdint.h> +#include <string.h> + +#ifdef __cplusplus +extern "C" { +#endif + +// Calculates the dot product between two (int16_t) vectors. +// +// Input: +// - vector1 : Vector 1 +// - vector2 : Vector 2 +// - vector_length : Number of samples used in the dot product +// - scaling : The number of right bit shifts to apply on each term +// during calculation to avoid overflow, i.e., the +// output will be in Q(-`scaling`) +// +// Return value : The dot product in Q(-scaling) +int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ diff --git a/third_party/libwebrtc/common_audio/signal_processing/downsample_fast.c b/third_party/libwebrtc/common_audio/signal_processing/downsample_fast.c new file mode 100644 index 0000000000..80fdc58a49 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/downsample_fast.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#include "rtc_base/checks.h" +#include "rtc_base/sanitizer.h" + +// TODO(Bjornv): Change the function parameter order to WebRTC code style. +// C version of WebRtcSpl_DownsampleFast() for generic platforms. +int WebRtcSpl_DownsampleFastC(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + int16_t* const original_data_out = data_out; + size_t i = 0; + size_t j = 0; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } + + rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]), + coefficients_length); + + for (i = delay; i < endpos; i += factor) { + out_s32 = 2048; // Round value, 0.5 in Q12. + + for (j = 0; j < coefficients_length; j++) { + // Negative overflow is permitted here, because this is + // auto-regressive filters, and the state for each batch run is + // stored in the "negative" positions of the output vector. + rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j], + sizeof(data_in[0]), 1); + // out_s32 is in Q12 domain. + out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j]; + } + + out_s32 >>= 12; // Q0. + + // Saturate and store the output. + *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); + } + + RTC_DCHECK_EQ(original_data_out + data_out_length, data_out); + rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]), + data_out_length); + + return 0; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/downsample_fast_mips.c b/third_party/libwebrtc/common_audio/signal_processing/downsample_fast_mips.c new file mode 100644 index 0000000000..0f3f3a069f --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/downsample_fast_mips.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// Version of WebRtcSpl_DownsampleFast() for MIPS platforms. +int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + int i; + int j; + int k; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + + int32_t tmp1, tmp2, tmp3, tmp4, factor_2; + int16_t* p_coefficients; + int16_t* p_data_in; + int16_t* p_data_in_0 = (int16_t*)&data_in[delay]; + int16_t* p_coefficients_0 = (int16_t*)&coefficients[0]; +#if !defined(MIPS_DSP_R1_LE) + int32_t max_16 = 0x7FFF; + int32_t min_16 = 0xFFFF8000; +#endif // #if !defined(MIPS_DSP_R1_LE) + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } +#if defined(MIPS_DSP_R2_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "subu %[i], %[endpos], %[delay] \n\t" + "sll %[factor_2], %[factor], 1 \n\t" + "1: \n\t" + "move %[p_data_in], %[p_data_in_0] \n\t" + "mult $zero, $zero \n\t" + "move %[p_coefs], %[p_coefs_0] \n\t" + "sra %[j], %[coef_length], 2 \n\t" + "beq %[j], $zero, 3f \n\t" + " andi %[k], %[coef_length], 3 \n\t" + "2: \n\t" + "lwl %[tmp1], 1(%[p_data_in]) \n\t" + "lwl %[tmp2], 3(%[p_coefs]) \n\t" + "lwl %[tmp3], -3(%[p_data_in]) \n\t" + "lwl %[tmp4], 7(%[p_coefs]) \n\t" + "lwr %[tmp1], -2(%[p_data_in]) \n\t" + "lwr %[tmp2], 0(%[p_coefs]) \n\t" + "lwr %[tmp3], -6(%[p_data_in]) \n\t" + "lwr %[tmp4], 4(%[p_coefs]) \n\t" + "packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t" + "packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t" + "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" + "dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t" + "addiu %[j], %[j], -1 \n\t" + "addiu %[p_data_in], %[p_data_in], -8 \n\t" + "bgtz %[j], 2b \n\t" + " addiu %[p_coefs], %[p_coefs], 8 \n\t" + "3: \n\t" + "beq %[k], $zero, 5f \n\t" + " nop \n\t" + "4: \n\t" + "lhu %[tmp1], 0(%[p_data_in]) \n\t" + "lhu %[tmp2], 0(%[p_coefs]) \n\t" + "addiu %[p_data_in], %[p_data_in], -2 \n\t" + "addiu %[k], %[k], -1 \n\t" + "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" + "bgtz %[k], 4b \n\t" + " addiu %[p_coefs], %[p_coefs], 2 \n\t" + "5: \n\t" + "extr_r.w %[out_s32], $ac0, 12 \n\t" + "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" + "subu %[i], %[i], %[factor] \n\t" + "shll_s.w %[out_s32], %[out_s32], 16 \n\t" + "sra %[out_s32], %[out_s32], 16 \n\t" + "sh %[out_s32], 0(%[data_out]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[data_out], %[data_out], 2 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), + [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), + [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), + [i] "=&r" (i), [k] "=&r" (k) + : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), + [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), + [delay] "r" (delay), [factor] "r" (factor) + : "memory", "hi", "lo" + ); +#else // #if defined(MIPS_DSP_R2_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "sll %[factor_2], %[factor], 1 \n\t" + "subu %[i], %[endpos], %[delay] \n\t" + "1: \n\t" + "move %[p_data_in], %[p_data_in_0] \n\t" + "addiu %[out_s32], $zero, 2048 \n\t" + "move %[p_coefs], %[p_coefs_0] \n\t" + "sra %[j], %[coef_length], 1 \n\t" + "beq %[j], $zero, 3f \n\t" + " andi %[k], %[coef_length], 1 \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[p_data_in]) \n\t" + "lh %[tmp2], 0(%[p_coefs]) \n\t" + "lh %[tmp3], -2(%[p_data_in]) \n\t" + "lh %[tmp4], 2(%[p_coefs]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "addiu %[p_coefs], %[p_coefs], 4 \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "addiu %[j], %[j], -1 \n\t" + "addiu %[p_data_in], %[p_data_in], -4 \n\t" + "addu %[tmp1], %[tmp1], %[tmp3] \n\t" + "bgtz %[j], 2b \n\t" + " addu %[out_s32], %[out_s32], %[tmp1] \n\t" + "3: \n\t" + "beq %[k], $zero, 4f \n\t" + " nop \n\t" + "lh %[tmp1], 0(%[p_data_in]) \n\t" + "lh %[tmp2], 0(%[p_coefs]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "addu %[out_s32], %[out_s32], %[tmp1] \n\t" + "4: \n\t" + "sra %[out_s32], %[out_s32], 12 \n\t" + "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[out_s32], %[out_s32], 16 \n\t" + "sra %[out_s32], %[out_s32], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp1], %[max_16], %[out_s32] \n\t" + "movn %[out_s32], %[max_16], %[tmp1] \n\t" + "slt %[tmp1], %[out_s32], %[min_16] \n\t" + "movn %[out_s32], %[min_16], %[tmp1] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "subu %[i], %[i], %[factor] \n\t" + "sh %[out_s32], 0(%[data_out]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[data_out], %[data_out], 2 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k), + [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), + [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), + [i] "=&r" (i) + : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), + [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), +#if !defined(MIPS_DSP_R1_LE) + [max_16] "r" (max_16), [min_16] "r" (min_16), +#endif // #if !defined(MIPS_DSP_R1_LE) + [delay] "r" (delay), [factor] "r" (factor) + : "memory", "hi", "lo" + ); +#endif // #if defined(MIPS_DSP_R2_LE) + return 0; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/downsample_fast_neon.c b/third_party/libwebrtc/common_audio/signal_processing/downsample_fast_neon.c new file mode 100644 index 0000000000..36fc0c8aee --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/downsample_fast_neon.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#include <arm_neon.h> + +// NEON intrinsics version of WebRtcSpl_DownsampleFast() +// for ARM 32-bit/64-bit platforms. +int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + size_t i = 0; + size_t j = 0; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + size_t res = data_out_length & 0x7; + size_t endpos1 = endpos - factor * res; + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } + + // First part, unroll the loop 8 times, with 3 subcases + // (factor == 2, 4, others). + switch (factor) { + case 2: { + for (i = delay; i < endpos1; i += 16) { + // Round value, 0.5 in Q12. + int32x4_t out32x4_0 = vdupq_n_s32(2048); + int32x4_t out32x4_1 = vdupq_n_s32(2048); + +#if defined(WEBRTC_ARCH_ARM64) + // Unroll the loop 2 times. + for (j = 0; j < coefficients_length - 1; j += 2) { + int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]); + int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32); + int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); + int16x4_t in16x4_1 = vget_low_s16(in16x8x2.val[1]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 1); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_1, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_2 = vget_high_s16(in16x8x2.val[0]); + int16x4_t in16x4_3 = vget_high_s16(in16x8x2.val[1]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_2, coeff16x4, 1); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0); + } + + for (; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } +#else + // On ARMv7, the loop unrolling 2 times results in performance + // regression. + for (j = 0; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]); + + // Mul and accumulate. + int16x4_t in16x4_0 = vget_low_s16(in16x8x2.val[0]); + int16x4_t in16x4_1 = vget_high_s16(in16x8x2.val[0]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } +#endif + + // Saturate and store the output. + int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12); + int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12); + vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1)); + data_out += 8; + } + break; + } + case 4: { + for (i = delay; i < endpos1; i += 32) { + // Round value, 0.5 in Q12. + int32x4_t out32x4_0 = vdupq_n_s32(2048); + int32x4_t out32x4_1 = vdupq_n_s32(2048); + + // Unroll the loop 4 times. + for (j = 0; j < coefficients_length - 3; j += 4) { + int16x4_t coeff16x4 = vld1_s16(&coefficients[j]); + int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); + int16x4_t in16x4_2 = vget_low_s16(in16x8x4.val[1]); + int16x4_t in16x4_4 = vget_low_s16(in16x8x4.val[2]); + int16x4_t in16x4_6 = vget_low_s16(in16x8x4.val[3]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 3); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_2, coeff16x4, 2); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_4, coeff16x4, 1); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_6, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]); + int16x4_t in16x4_3 = vget_high_s16(in16x8x4.val[1]); + int16x4_t in16x4_5 = vget_high_s16(in16x8x4.val[2]); + int16x4_t in16x4_7 = vget_high_s16(in16x8x4.val[3]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 3); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 2); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_5, coeff16x4, 1); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0); + } + + for (; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]); + + // Mul and accumulate low 64-bit data. + int16x4_t in16x4_0 = vget_low_s16(in16x8x4.val[0]); + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + + // Mul and accumulate high 64-bit data. + // TODO: vget_high_s16 need extra cost on ARM64. This could be + // replaced by vmlal_high_lane_s16. But for the interface of + // vmlal_high_lane_s16, there is a bug in gcc 4.9. + // This issue need to be tracked in the future. + int16x4_t in16x4_1 = vget_high_s16(in16x8x4.val[0]); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } + + // Saturate and store the output. + int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12); + int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12); + vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1)); + data_out += 8; + } + break; + } + default: { + for (i = delay; i < endpos1; i += factor * 8) { + // Round value, 0.5 in Q12. + int32x4_t out32x4_0 = vdupq_n_s32(2048); + int32x4_t out32x4_1 = vdupq_n_s32(2048); + + for (j = 0; j < coefficients_length; j++) { + int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]); + int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]); + in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1); + in16x4_0 = vld1_lane_s16(&data_in[i + factor * 2 - j], in16x4_0, 2); + in16x4_0 = vld1_lane_s16(&data_in[i + factor * 3 - j], in16x4_0, 3); + int16x4_t in16x4_1 = vld1_dup_s16(&data_in[i + factor * 4 - j]); + in16x4_1 = vld1_lane_s16(&data_in[i + factor * 5 - j], in16x4_1, 1); + in16x4_1 = vld1_lane_s16(&data_in[i + factor * 6 - j], in16x4_1, 2); + in16x4_1 = vld1_lane_s16(&data_in[i + factor * 7 - j], in16x4_1, 3); + + // Mul and accumulate. + out32x4_0 = vmlal_lane_s16(out32x4_0, in16x4_0, coeff16x4, 0); + out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_1, coeff16x4, 0); + } + + // Saturate and store the output. + int16x4_t out16x4_0 = vqshrn_n_s32(out32x4_0, 12); + int16x4_t out16x4_1 = vqshrn_n_s32(out32x4_1, 12); + vst1q_s16(data_out, vcombine_s16(out16x4_0, out16x4_1)); + data_out += 8; + } + break; + } + } + + // Second part, do the rest iterations (if any). + for (; i < endpos; i += factor) { + out_s32 = 2048; // Round value, 0.5 in Q12. + + for (j = 0; j < coefficients_length; j++) { + out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32); + } + + // Saturate and store the output. + out_s32 >>= 12; + *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); + } + + return 0; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/energy.c b/third_party/libwebrtc/common_audio/signal_processing/energy.c new file mode 100644 index 0000000000..5cce6b8777 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/energy.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_Energy(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +int32_t WebRtcSpl_Energy(int16_t* vector, + size_t vector_length, + int* scale_factor) +{ + int32_t en = 0; + size_t i; + int scaling = + WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); + size_t looptimes = vector_length; + int16_t *vectorptr = vector; + + for (i = 0; i < looptimes; i++) + { + en += (*vectorptr * *vectorptr) >> scaling; + vectorptr++; + } + *scale_factor = scaling; + + return en; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/filter_ar.c b/third_party/libwebrtc/common_audio/signal_processing/filter_ar.c new file mode 100644 index 0000000000..b1f666d723 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/filter_ar.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_FilterAR(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#include "rtc_base/checks.h" + +size_t WebRtcSpl_FilterAR(const int16_t* a, + size_t a_length, + const int16_t* x, + size_t x_length, + int16_t* state, + size_t state_length, + int16_t* state_low, + size_t state_low_length, + int16_t* filtered, + int16_t* filtered_low, + size_t filtered_low_length) +{ + int64_t o; + int32_t oLOW; + size_t i, j, stop; + const int16_t* x_ptr = &x[0]; + int16_t* filteredFINAL_ptr = filtered; + int16_t* filteredFINAL_LOW_ptr = filtered_low; + + for (i = 0; i < x_length; i++) + { + // Calculate filtered[i] and filtered_low[i] + const int16_t* a_ptr = &a[1]; + // The index can become negative, but the arrays will never be indexed + // with it when negative. Nevertheless, the index cannot be a size_t + // because of this. + int filtered_ix = (int)i - 1; + int16_t* state_ptr = &state[state_length - 1]; + int16_t* state_low_ptr = &state_low[state_length - 1]; + + o = (int32_t)(*x_ptr++) * (1 << 12); + oLOW = (int32_t)0; + + stop = (i < a_length) ? i + 1 : a_length; + for (j = 1; j < stop; j++) + { + RTC_DCHECK_GE(filtered_ix, 0); + o -= *a_ptr * filtered[filtered_ix]; + oLOW -= *a_ptr++ * filtered_low[filtered_ix]; + --filtered_ix; + } + for (j = i + 1; j < a_length; j++) + { + o -= *a_ptr * *state_ptr--; + oLOW -= *a_ptr++ * *state_low_ptr--; + } + + o += (oLOW >> 12); + *filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12); + *filteredFINAL_LOW_ptr++ = + (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++) * (1 << 12))); + } + + // Save the filter state + if (x_length >= state_length) + { + WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state); + WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low); + } else + { + for (i = 0; i < state_length - x_length; i++) + { + state[i] = state[i + x_length]; + state_low[i] = state_low[i + x_length]; + } + for (i = 0; i < x_length; i++) + { + state[state_length - x_length + i] = filtered[i]; + state_low[state_length - x_length + i] = filtered_low[i]; + } + } + + return x_length; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c b/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c new file mode 100644 index 0000000000..9010f1ce82 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stddef.h> + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bjornv): Change the return type to report errors. + +void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, + int16_t* data_out, + const int16_t* __restrict coefficients, + size_t coefficients_length, + size_t data_length) { + size_t i = 0; + size_t j = 0; + + RTC_DCHECK_GT(data_length, 0); + RTC_DCHECK_GT(coefficients_length, 1); + + for (i = 0; i < data_length; i++) { + int64_t output = 0; + int64_t sum = 0; + + for (j = coefficients_length - 1; j > 0; j--) { + // Negative overflow is permitted here, because this is + // auto-regressive filters, and the state for each batch run is + // stored in the "negative" positions of the output vector. + sum += coefficients[j] * data_out[(ptrdiff_t) i - (ptrdiff_t) j]; + } + + output = coefficients[0] * data_in[i]; + output -= sum; + + // Saturate and store the output. + output = WEBRTC_SPL_SAT(134215679, output, -134217728); + data_out[i] = (int16_t)((output + 2048) >> 12); + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S b/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S new file mode 100644 index 0000000000..60319d29ff --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12_armv7.S @@ -0,0 +1,218 @@ +@ +@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + +@ This file contains the function WebRtcSpl_FilterARFastQ12(), optimized for +@ ARMv7 platform. The description header can be found in +@ signal_processing_library.h +@ +@ Output is bit-exact with the generic C code as in filter_ar_fast_q12.c, and +@ the reference C code at end of this file. + +@ Assumptions: +@ (1) data_length > 0 +@ (2) coefficients_length > 1 + +@ Register usage: +@ +@ r0: &data_in[i] +@ r1: &data_out[i], for result ouput +@ r2: &coefficients[0] +@ r3: coefficients_length +@ r4: Iteration counter for the outer loop. +@ r5: data_out[j] as multiplication inputs +@ r6: Calculated value for output data_out[]; interation counter for inner loop +@ r7: Partial sum of a filtering multiplication results +@ r8: Partial sum of a filtering multiplication results +@ r9: &data_out[], for filtering input; data_in[i] +@ r10: coefficients[j] +@ r11: Scratch +@ r12: &coefficients[j] + +#include "rtc_base/system/asm_defines.h" + +GLOBAL_FUNCTION WebRtcSpl_FilterARFastQ12 +.align 2 +DEFINE_FUNCTION WebRtcSpl_FilterARFastQ12 + push {r4-r11} + + ldrsh r12, [sp, #32] @ data_length + subs r4, r12, #1 + beq ODD_LENGTH @ jump if data_length == 1 + +LOOP_LENGTH: + add r12, r2, r3, lsl #1 + sub r12, #4 @ &coefficients[coefficients_length - 2] + sub r9, r1, r3, lsl #1 + add r9, #2 @ &data_out[i - coefficients_length + 1] + ldr r5, [r9], #4 @ data_out[i - coefficients_length + {1,2}] + + mov r7, #0 @ sum1 + mov r8, #0 @ sum2 + subs r6, r3, #3 @ Iteration counter for inner loop. + beq ODD_A_LENGTH @ branch if coefficients_length == 3 + blt POST_LOOP_A_LENGTH @ branch if coefficients_length == 2 + +LOOP_A_LENGTH: + ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j] + subs r6, #2 + smlatt r8, r10, r5, r8 @ sum2 += coefficients[j] * data_out[i - j + 1]; + smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j]; + smlabt r7, r10, r5, r7 @ coefficients[j - 1] * data_out[i - j + 1]; + ldr r5, [r9], #4 @ data_out[i - j + 2], data_out[i - j + 3] + smlabb r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 2]; + bgt LOOP_A_LENGTH + blt POST_LOOP_A_LENGTH + +ODD_A_LENGTH: + ldrsh r10, [r12, #2] @ Filter coefficients coefficients[2] + sub r12, #2 @ &coefficients[0] + smlabb r7, r10, r5, r7 @ sum1 += coefficients[2] * data_out[i - 2]; + smlabt r8, r10, r5, r8 @ sum2 += coefficients[2] * data_out[i - 1]; + ldr r5, [r9, #-2] @ data_out[i - 1], data_out[i] + +POST_LOOP_A_LENGTH: + ldr r10, [r12] @ coefficients[0], coefficients[1] + smlatb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1]; + + ldr r9, [r0], #4 @ data_in[i], data_in[i + 1] + smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i]; + sub r6, r7 @ output1 -= sum1; + + sbfx r11, r6, #12, #16 + ssat r7, #16, r6, asr #12 + cmp r7, r11 + addeq r6, r6, #2048 + ssat r6, #16, r6, asr #12 + strh r6, [r1], #2 @ Store data_out[i] + + smlatb r8, r10, r6, r8 @ sum2 += coefficients[1] * data_out[i]; + smulbt r6, r10, r9 @ output2 = coefficients[0] * data_in[i + 1]; + sub r6, r8 @ output1 -= sum1; + + sbfx r11, r6, #12, #16 + ssat r7, #16, r6, asr #12 + cmp r7, r11 + addeq r6, r6, #2048 + ssat r6, #16, r6, asr #12 + strh r6, [r1], #2 @ Store data_out[i + 1] + + subs r4, #2 + bgt LOOP_LENGTH + blt END @ For even data_length, it's done. Jump to END. + +@ Process i = data_length -1, for the case of an odd length. +ODD_LENGTH: + add r12, r2, r3, lsl #1 + sub r12, #4 @ &coefficients[coefficients_length - 2] + sub r9, r1, r3, lsl #1 + add r9, #2 @ &data_out[i - coefficients_length + 1] + mov r7, #0 @ sum1 + mov r8, #0 @ sum1 + subs r6, r3, #2 @ inner loop counter + beq EVEN_A_LENGTH @ branch if coefficients_length == 2 + +LOOP2_A_LENGTH: + ldr r10, [r12], #-4 @ coefficients[j - 1], coefficients[j] + ldr r5, [r9], #4 @ data_out[i - j], data_out[i - j + 1] + subs r6, #2 + smlatb r7, r10, r5, r7 @ sum1 += coefficients[j] * data_out[i - j]; + smlabt r8, r10, r5, r8 @ coefficients[j - 1] * data_out[i - j + 1]; + bgt LOOP2_A_LENGTH + addlt r12, #2 + blt POST_LOOP2_A_LENGTH + +EVEN_A_LENGTH: + ldrsh r10, [r12, #2] @ Filter coefficients coefficients[1] + ldrsh r5, [r9] @ data_out[i - 1] + smlabb r7, r10, r5, r7 @ sum1 += coefficients[1] * data_out[i - 1]; + +POST_LOOP2_A_LENGTH: + ldrsh r10, [r12] @ Filter coefficients coefficients[0] + ldrsh r9, [r0] @ data_in[i] + smulbb r6, r10, r9 @ output1 = coefficients[0] * data_in[i]; + sub r6, r7 @ output1 -= sum1; + sub r6, r8 @ output1 -= sum1; + sbfx r8, r6, #12, #16 + ssat r7, #16, r6, asr #12 + cmp r7, r8 + addeq r6, r6, #2048 + ssat r6, #16, r6, asr #12 + strh r6, [r1] @ Store the data_out[i] + +END: + pop {r4-r11} + bx lr + +@Reference C code: +@ +@void WebRtcSpl_FilterARFastQ12(int16_t* data_in, +@ int16_t* data_out, +@ int16_t* __restrict coefficients, +@ size_t coefficients_length, +@ size_t data_length) { +@ size_t i = 0; +@ size_t j = 0; +@ +@ assert(data_length > 0); +@ assert(coefficients_length > 1); +@ +@ for (i = 0; i < data_length - 1; i += 2) { +@ int32_t output1 = 0; +@ int32_t sum1 = 0; +@ int32_t output2 = 0; +@ int32_t sum2 = 0; +@ +@ for (j = coefficients_length - 1; j > 2; j -= 2) { +@ sum1 += coefficients[j] * data_out[i - j]; +@ sum1 += coefficients[j - 1] * data_out[i - j + 1]; +@ sum2 += coefficients[j] * data_out[i - j + 1]; +@ sum2 += coefficients[j - 1] * data_out[i - j + 2]; +@ } +@ +@ if (j == 2) { +@ sum1 += coefficients[2] * data_out[i - 2]; +@ sum2 += coefficients[2] * data_out[i - 1]; +@ } +@ +@ sum1 += coefficients[1] * data_out[i - 1]; +@ output1 = coefficients[0] * data_in[i]; +@ output1 -= sum1; +@ // Saturate and store the output. +@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728); +@ data_out[i] = (int16_t)((output1 + 2048) >> 12); +@ +@ sum2 += coefficients[1] * data_out[i]; +@ output2 = coefficients[0] * data_in[i + 1]; +@ output2 -= sum2; +@ // Saturate and store the output. +@ output2 = WEBRTC_SPL_SAT(134215679, output2, -134217728); +@ data_out[i + 1] = (int16_t)((output2 + 2048) >> 12); +@ } +@ +@ if (i == data_length - 1) { +@ int32_t output1 = 0; +@ int32_t sum1 = 0; +@ +@ for (j = coefficients_length - 1; j > 1; j -= 2) { +@ sum1 += coefficients[j] * data_out[i - j]; +@ sum1 += coefficients[j - 1] * data_out[i - j + 1]; +@ } +@ +@ if (j == 1) { +@ sum1 += coefficients[1] * data_out[i - 1]; +@ } +@ +@ output1 = coefficients[0] * data_in[i]; +@ output1 -= sum1; +@ // Saturate and store the output. +@ output1 = WEBRTC_SPL_SAT(134215679, output1, -134217728); +@ data_out[i] = (int16_t)((output1 + 2048) >> 12); +@ } +@} diff --git a/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c b/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c new file mode 100644 index 0000000000..b9ad30f006 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/filter_ar_fast_q12_mips.c @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, + int16_t* data_out, + const int16_t* __restrict coefficients, + size_t coefficients_length, + size_t data_length) { + int r0, r1, r2, r3; + int coef0, offset; + int i, j, k; + int coefptr, outptr, tmpout, inptr; +#if !defined(MIPS_DSP_R1_LE) + int max16 = 0x7FFF; + int min16 = 0xFFFF8000; +#endif // #if !defined(MIPS_DSP_R1_LE) + + RTC_DCHECK_GT(data_length, 0); + RTC_DCHECK_GT(coefficients_length, 1); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], %[data_length], 0 \n\t" + "lh %[coef0], 0(%[coefficients]) \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" + "andi %[k], %[j], 1 \n\t" + "sll %[offset], %[j], 1 \n\t" + "subu %[outptr], %[data_out], %[offset] \n\t" + "addiu %[inptr], %[data_in], 0 \n\t" + "bgtz %[k], 3f \n\t" + " addu %[coefptr], %[coefficients], %[offset] \n\t" + "1: \n\t" + "lh %[r0], 0(%[inptr]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "addiu %[tmpout], %[outptr], 0 \n\t" + "mult %[r0], %[coef0] \n\t" + "2: \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "lh %[r2], 2(%[tmpout]) \n\t" + "lh %[r3], -2(%[coefptr]) \n\t" + "addiu %[tmpout], %[tmpout], 4 \n\t" + "msub %[r0], %[r1] \n\t" + "msub %[r2], %[r3] \n\t" + "addiu %[j], %[j], -2 \n\t" + "bgtz %[j], 2b \n\t" + " addiu %[coefptr], %[coefptr], -4 \n\t" +#if defined(MIPS_DSP_R1_LE) + "extr_r.w %[r0], $ac0, 12 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "mflo %[r0] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[coefptr], %[coefficients], %[offset] \n\t" + "addiu %[inptr], %[inptr], 2 \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[r0], %[r0], 16 \n\t" + "sra %[r0], %[r0], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 2048 \n\t" + "sra %[r0], %[r0], 12 \n\t" + "slt %[r1], %[max16], %[r0] \n\t" + "movn %[r0], %[max16], %[r1] \n\t" + "slt %[r1], %[r0], %[min16] \n\t" + "movn %[r0], %[min16], %[r1] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[r0], 0(%[tmpout]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[outptr], %[outptr], 2 \n\t" + "b 5f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[inptr]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "addiu %[tmpout], %[outptr], 0 \n\t" + "mult %[r0], %[coef0] \n\t" + "4: \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "lh %[r2], 2(%[tmpout]) \n\t" + "lh %[r3], -2(%[coefptr]) \n\t" + "addiu %[tmpout], %[tmpout], 4 \n\t" + "msub %[r0], %[r1] \n\t" + "msub %[r2], %[r3] \n\t" + "addiu %[j], %[j], -2 \n\t" + "bgtz %[j], 4b \n\t" + " addiu %[coefptr], %[coefptr], -4 \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "msub %[r0], %[r1] \n\t" +#if defined(MIPS_DSP_R1_LE) + "extr_r.w %[r0], $ac0, 12 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "mflo %[r0] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "addu %[coefptr], %[coefficients], %[offset] \n\t" + "addiu %[inptr], %[inptr], 2 \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[r0], %[r0], 16 \n\t" + "sra %[r0], %[r0], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 2048 \n\t" + "sra %[r0], %[r0], 12 \n\t" + "slt %[r1], %[max16], %[r0] \n\t" + "movn %[r0], %[max16], %[r1] \n\t" + "slt %[r1], %[r0], %[min16] \n\t" + "movn %[r0], %[min16], %[r1] \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[r0], 2(%[tmpout]) \n\t" + "bgtz %[i], 3b \n\t" + " addiu %[outptr], %[outptr], 2 \n\t" + "5: \n\t" + ".set pop \n\t" + : [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0), + [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), + [coef0] "=&r" (coef0), [offset] "=&r" (offset), + [outptr] "=&r" (outptr), [inptr] "=&r" (inptr), + [coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout) + : [coefficients] "r" (coefficients), [data_length] "r" (data_length), + [coefficients_length] "r" (coefficients_length), +#if !defined(MIPS_DSP_R1_LE) + [max16] "r" (max16), [min16] "r" (min16), +#endif + [data_out] "r" (data_out), [data_in] "r" (data_in) + : "hi", "lo", "memory" + ); +} + diff --git a/third_party/libwebrtc/common_audio/signal_processing/filter_ma_fast_q12.c b/third_party/libwebrtc/common_audio/signal_processing/filter_ma_fast_q12.c new file mode 100644 index 0000000000..329d47e14f --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/filter_ma_fast_q12.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_FilterMAFastQ12(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#include "rtc_base/sanitizer.h" + +void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr, + int16_t* out_ptr, + const int16_t* B, + size_t B_length, + size_t length) +{ + size_t i, j; + + rtc_MsanCheckInitialized(B, sizeof(B[0]), B_length); + rtc_MsanCheckInitialized(in_ptr - B_length + 1, sizeof(in_ptr[0]), + B_length + length - 1); + + for (i = 0; i < length; i++) + { + int32_t o = 0; + + for (j = 0; j < B_length; j++) + { + // Negative overflow is permitted here, because this is + // auto-regressive filters, and the state for each batch run is + // stored in the "negative" positions of the output vector. + o += B[j] * in_ptr[(ptrdiff_t) i - (ptrdiff_t) j]; + } + + // If output is higher than 32768, saturate it. Same with negative side + // 2^27 = 134217728, which corresponds to 32768 in Q12 + + // Saturate the output + o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728); + + *out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12); + } + return; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/get_hanning_window.c b/third_party/libwebrtc/common_audio/signal_processing/get_hanning_window.c new file mode 100644 index 0000000000..8f29da8d9b --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/get_hanning_window.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_GetHanningWindow(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// Hanning table with 256 entries +static const int16_t kHanningTable[] = { + 1, 2, 6, 10, 15, 22, 30, 39, + 50, 62, 75, 89, 104, 121, 138, 157, + 178, 199, 222, 246, 271, 297, 324, 353, + 383, 413, 446, 479, 513, 549, 586, 624, + 663, 703, 744, 787, 830, 875, 920, 967, + 1015, 1064, 1114, 1165, 1218, 1271, 1325, 1381, + 1437, 1494, 1553, 1612, 1673, 1734, 1796, 1859, + 1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399, + 2471, 2543, 2617, 2691, 2765, 2841, 2918, 2995, + 3073, 3152, 3232, 3312, 3393, 3475, 3558, 3641, + 3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330, + 4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057, + 5150, 5244, 5338, 5432, 5527, 5622, 5718, 5814, + 5910, 6007, 6104, 6202, 6299, 6397, 6495, 6594, + 6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389, + 7489, 7589, 7690, 7790, 7890, 7991, 8091, 8192, + 8293, 8393, 8494, 8594, 8694, 8795, 8895, 8995, + 9095, 9195, 9294, 9394, 9493, 9593, 9691, 9790, + 9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570, +10666, 10762, 10857, 10952, 11046, 11140, 11234, 11327, +11420, 11512, 11603, 11695, 11785, 11875, 11965, 12054, +12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743, +12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389, +13466, 13543, 13619, 13693, 13767, 13841, 13913, 13985, +14055, 14125, 14194, 14262, 14329, 14395, 14460, 14525, +14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003, +15059, 15113, 15166, 15219, 15270, 15320, 15369, 15417, +15464, 15509, 15554, 15597, 15640, 15681, 15721, 15760, +15798, 15835, 15871, 15905, 15938, 15971, 16001, 16031, +16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227, +16246, 16263, 16280, 16295, 16309, 16322, 16334, 16345, +16354, 16362, 16369, 16374, 16378, 16382, 16383, 16384 +}; + +void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size) +{ + size_t jj; + int16_t *vptr1; + + int32_t index; + int32_t factor = ((int32_t)0x40000000); + + factor = WebRtcSpl_DivW32W16(factor, (int16_t)size); + if (size < 513) + index = (int32_t)-0x200000; + else + index = (int32_t)-0x100000; + vptr1 = v; + + for (jj = 0; jj < size; jj++) + { + index += factor; + (*vptr1++) = kHanningTable[index >> 22]; + } + +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/get_scaling_square.c b/third_party/libwebrtc/common_audio/signal_processing/get_scaling_square.c new file mode 100644 index 0000000000..4eb126941e --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/get_scaling_square.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_GetScalingSquare(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, + size_t in_vector_length, + size_t times) +{ + int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); + size_t i; + int16_t smax = -1; + int16_t sabs; + int16_t *sptr = in_vector; + int16_t t; + size_t looptimes = in_vector_length; + + for (i = looptimes; i > 0; i--) + { + sabs = (*sptr > 0 ? *sptr++ : -*sptr++); + smax = (sabs > smax ? sabs : smax); + } + t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + + if (smax == 0) + { + return 0; // Since norm(0) returns 0 + } else + { + return (t > nbits) ? 0 : nbits - t; + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/ilbc_specific_functions.c b/third_party/libwebrtc/common_audio/signal_processing/ilbc_specific_functions.c new file mode 100644 index 0000000000..cbdd3dcbcd --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/ilbc_specific_functions.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the iLBC specific functions + * WebRtcSpl_ReverseOrderMultArrayElements() + * WebRtcSpl_ElementwiseVectorMult() + * WebRtcSpl_AddVectorsAndShift() + * WebRtcSpl_AddAffineVectorToVector() + * WebRtcSpl_AffineTransformVector() + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in, + const int16_t *win, + size_t vector_length, + int16_t right_shifts) +{ + size_t i; + int16_t *outptr = out; + const int16_t *inptr = in; + const int16_t *winptr = win; + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts); + } +} + +void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in, + const int16_t *win, size_t vector_length, + int16_t right_shifts) +{ + size_t i; + int16_t *outptr = out; + const int16_t *inptr = in; + const int16_t *winptr = win; + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts); + } +} + +void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1, + const int16_t *in2, size_t vector_length, + int16_t right_shifts) +{ + size_t i; + int16_t *outptr = out; + const int16_t *in1ptr = in1; + const int16_t *in2ptr = in2; + for (i = vector_length; i > 0; i--) + { + (*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts); + } +} + +void WebRtcSpl_AddAffineVectorToVector(int16_t *out, const int16_t *in, + int16_t gain, int32_t add_constant, + int16_t right_shifts, + size_t vector_length) +{ + size_t i; + + for (i = 0; i < vector_length; i++) + { + out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts); + } +} + +void WebRtcSpl_AffineTransformVector(int16_t *out, const int16_t *in, + int16_t gain, int32_t add_constant, + int16_t right_shifts, size_t vector_length) +{ + size_t i; + + for (i = 0; i < vector_length; i++) + { + out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts); + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/include/real_fft.h b/third_party/libwebrtc/common_audio/signal_processing/include/real_fft.h new file mode 100644 index 0000000000..a0da5096c1 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/include/real_fft.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ + +#include <stdint.h> + +// For ComplexFFT(), the maximum fft order is 10; +// WebRTC APM uses orders of only 7 and 8. +enum { kMaxFFTOrder = 10 }; + +struct RealFFT; + +#ifdef __cplusplus +extern "C" { +#endif + +struct RealFFT* WebRtcSpl_CreateRealFFT(int order); +void WebRtcSpl_FreeRealFFT(struct RealFFT* self); + +// Compute an FFT for a real-valued signal of length of 2^order, +// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the +// specification structure, which must be initialized prior to calling the FFT +// function with WebRtcSpl_CreateRealFFT(). +// The relationship between the input and output sequences can +// be expressed in terms of the DFT, i.e.: +// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N) +// n=0,1,2,...N-1 +// N=2^order. +// The conjugate-symmetric output sequence is represented using a CCS vector, +// which is of length N+2, and is organized as follows: +// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1 +// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0 +// where R[n] and I[n], respectively, denote the real and imaginary components +// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length. +// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to +// the foldover frequency. +// +// Input Arguments: +// self - pointer to preallocated and initialized FFT specification structure. +// real_data_in - the input signal. For an ARM Neon platform, it must be +// aligned on a 32-byte boundary. +// +// Output Arguments: +// complex_data_out - the output complex signal with (2^order + 2) 16-bit +// elements. For an ARM Neon platform, it must be different +// from real_data_in, and aligned on a 32-byte boundary. +// +// Return Value: +// 0 - FFT calculation is successful. +// -1 - Error with bad arguments (null pointers). +int WebRtcSpl_RealForwardFFT(struct RealFFT* self, + const int16_t* real_data_in, + int16_t* complex_data_out); + +// Compute the inverse FFT for a conjugate-symmetric input sequence of length of +// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by +// the specification structure, which must be initialized prior to calling the +// FFT function with WebRtcSpl_CreateRealFFT(). +// For a transform of length M, the input sequence is represented using a packed +// CCS vector of length M+2, which is explained in the comments for +// WebRtcSpl_RealForwardFFTC above. +// +// Input Arguments: +// self - pointer to preallocated and initialized FFT specification structure. +// complex_data_in - the input complex signal with (2^order + 2) 16-bit +// elements. For an ARM Neon platform, it must be aligned on +// a 32-byte boundary. +// +// Output Arguments: +// real_data_out - the output real signal. For an ARM Neon platform, it must +// be different to complex_data_in, and aligned on a 32-byte +// boundary. +// +// Return Value: +// 0 or a positive number - a value that the elements in the `real_data_out` +// should be shifted left with in order to get +// correct physical values. +// -1 - Error with bad arguments (null pointers). +int WebRtcSpl_RealInverseFFT(struct RealFFT* self, + const int16_t* complex_data_in, + int16_t* real_data_out); + +#ifdef __cplusplus +} +#endif + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ diff --git a/third_party/libwebrtc/common_audio/signal_processing/include/signal_processing_library.h b/third_party/libwebrtc/common_audio/signal_processing/include/signal_processing_library.h new file mode 100644 index 0000000000..48c9b309b4 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/include/signal_processing_library.h @@ -0,0 +1,1635 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes all of the fix point signal processing library + * (SPL) function descriptions and declarations. For specific function calls, + * see bottom of file. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ + +#include <string.h> + +#include "common_audio/signal_processing/dot_product_with_scale.h" + +// Macros specific for the fixed point implementation +#define WEBRTC_SPL_WORD16_MAX 32767 +#define WEBRTC_SPL_WORD16_MIN -32768 +#define WEBRTC_SPL_WORD32_MAX (int32_t)0x7fffffff +#define WEBRTC_SPL_WORD32_MIN (int32_t)0x80000000 +#define WEBRTC_SPL_MAX_LPC_ORDER 14 +#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value +#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value +// TODO(kma/bjorn): For the next two macros, investigate how to correct the code +// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN. +#define WEBRTC_SPL_ABS_W16(a) (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a)) +#define WEBRTC_SPL_ABS_W32(a) (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a)) + +#define WEBRTC_SPL_MUL(a, b) ((int32_t)((int32_t)(a) * (int32_t)(b))) +#define WEBRTC_SPL_UMUL(a, b) ((uint32_t)((uint32_t)(a) * (uint32_t)(b))) +#define WEBRTC_SPL_UMUL_32_16(a, b) ((uint32_t)((uint32_t)(a) * (uint16_t)(b))) +#define WEBRTC_SPL_MUL_16_U16(a, b) ((int32_t)(int16_t)(a) * (uint16_t)(b)) + +// clang-format off +// clang-format would choose some identation +// leading to presubmit error (cpplint.py) +#ifndef WEBRTC_ARCH_ARM_V7 +// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h +#ifndef MIPS32_LE +// For MIPS platforms, these are inline functions in spl_inl_mips.h +#define WEBRTC_SPL_MUL_16_16(a, b) ((int32_t)(((int16_t)(a)) * ((int16_t)(b)))) +#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \ + (WEBRTC_SPL_MUL_16_16(a, b >> 16) + \ + ((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15)) +#endif +#endif + +#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \ + (WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 5) + \ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10)) +#define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \ + (WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 2) + \ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13)) +#define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \ + ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 1)) + \ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14)) +// clang-format on + +#define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) (WEBRTC_SPL_MUL_16_16(a, b) >> (c)) + +#define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \ + ((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t)(((int32_t)1) << ((c)-1)))) >> (c)) + +// C + the 32 most significant bits of A * B +#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \ + (C + (B >> 16) * A + (((uint32_t)(B & 0x0000FFFF) * A) >> 16)) + +#define WEBRTC_SPL_SAT(a, b, c) (b > a ? a : b < c ? c : b) + +// Shifting with negative numbers allowed +// Positive means left shift +#define WEBRTC_SPL_SHIFT_W32(x, c) ((c) >= 0 ? (x) * (1 << (c)) : (x) >> -(c)) + +// Shifting with negative numbers not allowed +// We cannot do casting here due to signed/unsigned problem +#define WEBRTC_SPL_LSHIFT_W32(x, c) ((x) << (c)) + +#define WEBRTC_SPL_RSHIFT_U32(x, c) ((uint32_t)(x) >> (c)) + +#define WEBRTC_SPL_RAND(a) ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff)) + +#ifdef __cplusplus +extern "C" { +#endif + +#define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \ + memcpy(v1, v2, (length) * sizeof(int16_t)) + +// inline functions: +#include "common_audio/signal_processing/include/spl_inl.h" + +// third party math functions +#include "common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h" + +int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, + size_t in_vector_length, + size_t times); + +// Copy and set operations. Implementation in copy_set_operations.c. +// Descriptions at bottom of file. +void WebRtcSpl_MemSetW16(int16_t* vector, + int16_t set_value, + size_t vector_length); +void WebRtcSpl_MemSetW32(int32_t* vector, + int32_t set_value, + size_t vector_length); +void WebRtcSpl_MemCpyReversedOrder(int16_t* out_vector, + int16_t* in_vector, + size_t vector_length); +void WebRtcSpl_CopyFromEndW16(const int16_t* in_vector, + size_t in_vector_length, + size_t samples, + int16_t* out_vector); +void WebRtcSpl_ZerosArrayW16(int16_t* vector, size_t vector_length); +void WebRtcSpl_ZerosArrayW32(int32_t* vector, size_t vector_length); +// End: Copy and set operations. + +// Minimum and maximum operation functions and their pointers. +// Implementation in min_max_operations.c. + +// Returns the largest absolute value in a signed 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, size_t length); +extern const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the largest absolute value in a signed 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, size_t length); +extern const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS_DSP_R1_LE) +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the maximum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in `vector`. +typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length); +extern const MaxValueW16 WebRtcSpl_MaxValueW16; +int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the maximum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in `vector`. +typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length); +extern const MaxValueW32 WebRtcSpl_MaxValueW32; +int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the minimum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in `vector`. +typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length); +extern const MinValueW16 WebRtcSpl_MinValueW16; +int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the minimum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in `vector`. +typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length); +extern const MinValueW32 WebRtcSpl_MinValueW32; +int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns both the minimum and maximum values of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// Ouput: +// - max_val : Maximum sample value in `vector`. +// - min_val : Minimum sample value in `vector`. +void WebRtcSpl_MinMaxW16(const int16_t* vector, + size_t length, + int16_t* min_val, + int16_t* max_val); +#if defined(WEBRTC_HAS_NEON) +void WebRtcSpl_MinMaxW16Neon(const int16_t* vector, + size_t length, + int16_t* min_val, + int16_t* max_val); +#endif + +// Returns the vector index to the largest absolute value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum absolute value in vector. +// If there are multiple equal maxima, return the index of the +// first. -32768 will always have precedence over 32767 (despite +// -32768 presenting an int16 absolute value of 32767). +size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length); + +// Returns the element with the largest absolute value of a 16-bit vector. Note +// that this function can return a negative value. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : The element with the largest absolute value. Note that this +// may be a negative value. +int16_t WebRtcSpl_MaxAbsElementW16(const int16_t* vector, size_t length); + +// Returns the vector index to the maximum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector (if multiple +// indexes have the maximum, return the first). +size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the maximum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector (if multiple +// indexes have the maximum, return the first). +size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length); + +// Returns the vector index to the minimum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector (if multiple +// indexes have the minimum, return the first). +size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the minimum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector (if multiple +// indexes have the minimum, return the first). +size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length); + +// End: Minimum and maximum operations. + +// Vector scaling operations. Implementation in vector_scaling_operations.c. +// Description at bottom of file. +void WebRtcSpl_VectorBitShiftW16(int16_t* out_vector, + size_t vector_length, + const int16_t* in_vector, + int16_t right_shifts); +void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector, + size_t vector_length, + const int32_t* in_vector, + int16_t right_shifts); +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out_vector, + size_t vector_length, + const int32_t* in_vector, + int right_shifts); +void WebRtcSpl_ScaleVector(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ScaleAndAddVectors(const int16_t* in_vector1, + int16_t gain1, + int right_shifts1, + const int16_t* in_vector2, + int16_t gain2, + int right_shifts2, + int16_t* out_vector, + size_t vector_length); + +// The functions (with related pointer) perform the vector operation: +// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k]) +// + round_value) >> right_shifts, +// where round_value = (1 << right_shifts) >> 1. +// +// Input: +// - in_vector1 : Input vector 1 +// - in_vector1_scale : Gain to be used for vector 1 +// - in_vector2 : Input vector 2 +// - in_vector2_scale : Gain to be used for vector 2 +// - right_shifts : Number of right bit shifts to be applied +// - length : Number of elements in the input vectors +// +// Output: +// - out_vector : Output vector +// Return value : 0 if OK, -1 if (in_vector1 == null +// || in_vector2 == null || out_vector == null +// || length <= 0 || right_shift < 0). +typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +extern const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; +int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +#if defined(MIPS_DSP_R1_LE) +int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +#endif +// End: Vector scaling operations. + +// iLBC specific functions. Implementations in ilbc_specific_functions.c. +// Description at bottom of file. +void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out_vector, + const int16_t* in_vector, + const int16_t* window, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ElementwiseVectorMult(int16_t* out_vector, + const int16_t* in_vector, + const int16_t* window, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_AddVectorsAndShift(int16_t* out_vector, + const int16_t* in_vector1, + const int16_t* in_vector2, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_AddAffineVectorToVector(int16_t* out_vector, + const int16_t* in_vector, + int16_t gain, + int32_t add_constant, + int16_t right_shifts, + size_t vector_length); +void WebRtcSpl_AffineTransformVector(int16_t* out_vector, + const int16_t* in_vector, + int16_t gain, + int32_t add_constant, + int16_t right_shifts, + size_t vector_length); +// End: iLBC specific functions. + +// Signal processing operations. + +// A 32-bit fix-point implementation of auto-correlation computation +// +// Input: +// - in_vector : Vector to calculate autocorrelation upon +// - in_vector_length : Length (in samples) of `vector` +// - order : The order up to which the autocorrelation should be +// calculated +// +// Output: +// - result : auto-correlation values (values should be seen +// relative to each other since the absolute values +// might have been down shifted to avoid overflow) +// +// - scale : The number of left shifts required to obtain the +// auto-correlation in Q0 +// +// Return value : Number of samples in `result`, i.e. (order+1) +size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector, + size_t in_vector_length, + size_t order, + int32_t* result, + int* scale); + +// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that +// does NOT use the 64 bit class +// +// Input: +// - auto_corr : Vector with autocorrelation values of length >= `order`+1 +// - order : The LPC filter order (support up to order 20) +// +// Output: +// - lpc_coef : lpc_coef[0..order] LPC coefficients in Q12 +// - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15 +// +// Return value : 1 for stable 0 for unstable +int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr, + int16_t* lpc_coef, + int16_t* refl_coef, + size_t order); + +// Converts reflection coefficients `refl_coef` to LPC coefficients `lpc_coef`. +// This version is a 16 bit operation. +// +// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a +// "slightly unstable" filter (i.e., a pole just outside the unit circle) in +// "rare" cases even if the reflection coefficients are stable. +// +// Input: +// - refl_coef : Reflection coefficients in Q15 that should be converted +// to LPC coefficients +// - use_order : Number of coefficients in `refl_coef` +// +// Output: +// - lpc_coef : LPC coefficients in Q12 +void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef, + int use_order, + int16_t* lpc_coef); + +// Converts LPC coefficients `lpc_coef` to reflection coefficients `refl_coef`. +// This version is a 16 bit operation. +// The conversion is implemented by the step-down algorithm. +// +// Input: +// - lpc_coef : LPC coefficients in Q12, that should be converted to +// reflection coefficients +// - use_order : Number of coefficients in `lpc_coef` +// +// Output: +// - refl_coef : Reflection coefficients in Q15. +void WebRtcSpl_LpcToReflCoef(int16_t* lpc_coef, + int use_order, + int16_t* refl_coef); + +// Calculates reflection coefficients (16 bit) from auto-correlation values +// +// Input: +// - auto_corr : Auto-correlation values +// - use_order : Number of coefficients wanted be calculated +// +// Output: +// - refl_coef : Reflection coefficients in Q15. +void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr, + int use_order, + int16_t* refl_coef); + +// The functions (with related pointer) calculate the cross-correlation between +// two sequences `seq1` and `seq2`. +// `seq1` is fixed and `seq2` slides as the pointer is increased with the +// amount `step_seq2`. Note the arguments should obey the relationship: +// `dim_seq` - 1 + `step_seq2` * (`dim_cross_correlation` - 1) < +// buffer size of `seq2` +// +// Input: +// - seq1 : First sequence (fixed throughout the correlation) +// - seq2 : Second sequence (slides `step_vector2` for each +// new correlation) +// - dim_seq : Number of samples to use in the cross-correlation +// - dim_cross_correlation : Number of cross-correlations to calculate (the +// start position for `vector2` is updated for each +// new one) +// - right_shifts : Number of right bit shifts to use. This will +// become the output Q-domain. +// - step_seq2 : How many (positive or negative) steps the +// `vector2` pointer should be updated for each new +// cross-correlation value. +// +// Output: +// - cross_correlation : The cross-correlation in Q(-right_shifts) +typedef void (*CrossCorrelation)(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +extern const CrossCorrelation WebRtcSpl_CrossCorrelation; +void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#if defined(WEBRTC_HAS_NEON) +void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#endif +#if defined(MIPS32_LE) +void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#endif + +// Creates (the first half of) a Hanning window. Size must be at least 1 and +// at most 512. +// +// Input: +// - size : Length of the requested Hanning window (1 to 512) +// +// Output: +// - window : Hanning vector in Q14. +void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size); + +// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector +// `in_vector`. Input and output values are in Q15. +// +// Inputs: +// - in_vector : Values to calculate sqrt(1 - x^2) of +// - vector_length : Length of vector `in_vector` +// +// Output: +// - out_vector : Output values in Q15 +void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* in_vector, + size_t vector_length, + int16_t* out_vector); +// End: Signal processing operations. + +// Randomization functions. Implementations collected in +// randomization_functions.c and descriptions at bottom of this file. +int16_t WebRtcSpl_RandU(uint32_t* seed); +int16_t WebRtcSpl_RandN(uint32_t* seed); +int16_t WebRtcSpl_RandUArray(int16_t* vector, + int16_t vector_length, + uint32_t* seed); +// End: Randomization functions. + +// Math functions +int32_t WebRtcSpl_Sqrt(int32_t value); + +// Divisions. Implementations collected in division_operations.c and +// descriptions at bottom of this file. +uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den); +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den); +int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den); +int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den); +int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low); +// End: Divisions. + +int32_t WebRtcSpl_Energy(int16_t* vector, + size_t vector_length, + int* scale_factor); + +// Filter operations. +size_t WebRtcSpl_FilterAR(const int16_t* ar_coef, + size_t ar_coef_length, + const int16_t* in_vector, + size_t in_vector_length, + int16_t* filter_state, + size_t filter_state_length, + int16_t* filter_state_low, + size_t filter_state_low_length, + int16_t* out_vector, + int16_t* out_vector_low, + size_t out_vector_low_length); + +// WebRtcSpl_FilterMAFastQ12(...) +// +// Performs a MA filtering on a vector in Q12 +// +// Input: +// - in_vector : Input samples (state in positions +// in_vector[-order] .. in_vector[-1]) +// - ma_coef : Filter coefficients (in Q12) +// - ma_coef_length : Number of B coefficients (order+1) +// - vector_length : Number of samples to be filtered +// +// Output: +// - out_vector : Filtered samples +// +void WebRtcSpl_FilterMAFastQ12(const int16_t* in_vector, + int16_t* out_vector, + const int16_t* ma_coef, + size_t ma_coef_length, + size_t vector_length); + +// Performs a AR filtering on a vector in Q12 +// Input: +// - data_in : Input samples +// - data_out : State information in positions +// data_out[-order] .. data_out[-1] +// - coefficients : Filter coefficients (in Q12) +// - coefficients_length: Number of coefficients (order+1) +// - data_length : Number of samples to be filtered +// Output: +// - data_out : Filtered samples +void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, + int16_t* data_out, + const int16_t* __restrict coefficients, + size_t coefficients_length, + size_t data_length); + +// The functions (with related pointer) perform a MA down sampling filter +// on a vector. +// Input: +// - data_in : Input samples (state in positions +// data_in[-order] .. data_in[-1]) +// - data_in_length : Number of samples in `data_in` to be filtered. +// This must be at least +// `delay` + `factor`*(`out_vector_length`-1) + 1) +// - data_out_length : Number of down sampled samples desired +// - coefficients : Filter coefficients (in Q12) +// - coefficients_length: Number of coefficients (order+1) +// - factor : Decimation factor +// - delay : Delay of filter (compensated for in out_vector) +// Output: +// - data_out : Filtered samples +// Return value : 0 if OK, -1 if `in_vector` is too short +typedef int (*DownsampleFast)(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +extern const DownsampleFast WebRtcSpl_DownsampleFast; +int WebRtcSpl_DownsampleFastC(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#if defined(WEBRTC_HAS_NEON) +int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#endif +#if defined(MIPS32_LE) +int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#endif + +// End: Filter operations. + +// FFT operations + +int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode); +int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode); + +// Treat a 16-bit complex data buffer `complex_data` as an array of 32-bit +// values, and swap elements whose indexes are bit-reverses of each other. +// +// Input: +// - complex_data : Complex data buffer containing 2^`stages` real +// elements interleaved with 2^`stages` imaginary +// elements: [Re Im Re Im Re Im....] +// - stages : Number of FFT stages. Must be at least 3 and at most +// 10, since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// Output: +// - complex_data : The complex data buffer. + +void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages); + +// End: FFT operations + +/************************************************************ + * + * RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW + * + ************************************************************/ + +/******************************************************************* + * resample.c + * + * Includes the following resampling combinations + * 22 kHz -> 16 kHz + * 16 kHz -> 22 kHz + * 22 kHz -> 8 kHz + * 8 kHz -> 22 kHz + * + ******************************************************************/ + +// state structure for 22 -> 16 resampler +typedef struct { + int32_t S_22_44[8]; + int32_t S_44_32[8]; + int32_t S_32_16[8]; +} WebRtcSpl_State22khzTo16khz; + +void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State22khzTo16khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state); + +// state structure for 16 -> 22 resampler +typedef struct { + int32_t S_16_32[8]; + int32_t S_32_22[8]; +} WebRtcSpl_State16khzTo22khz; + +void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State16khzTo22khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state); + +// state structure for 22 -> 8 resampler +typedef struct { + int32_t S_22_22[16]; + int32_t S_22_16[8]; + int32_t S_16_8[8]; +} WebRtcSpl_State22khzTo8khz; + +void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State22khzTo8khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state); + +// state structure for 8 -> 22 resampler +typedef struct { + int32_t S_8_16[8]; + int32_t S_16_11[8]; + int32_t S_11_22[8]; +} WebRtcSpl_State8khzTo22khz; + +void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State8khzTo22khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state); + +/******************************************************************* + * resample_fractional.c + * Functions for internal use in the other resample functions + * + * Includes the following resampling combinations + * 48 kHz -> 32 kHz + * 32 kHz -> 24 kHz + * 44 kHz -> 32 kHz + * + ******************************************************************/ + +void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K); + +void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K); + +void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K); + +/******************************************************************* + * resample_48khz.c + * + * Includes the following resampling combinations + * 48 kHz -> 16 kHz + * 16 kHz -> 48 kHz + * 48 kHz -> 8 kHz + * 8 kHz -> 48 kHz + * + ******************************************************************/ + +typedef struct { + int32_t S_48_48[16]; + int32_t S_48_32[8]; + int32_t S_32_16[8]; +} WebRtcSpl_State48khzTo16khz; + +void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State48khzTo16khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state); + +typedef struct { + int32_t S_16_32[8]; + int32_t S_32_24[8]; + int32_t S_24_48[8]; +} WebRtcSpl_State16khzTo48khz; + +void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State16khzTo48khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state); + +typedef struct { + int32_t S_48_24[8]; + int32_t S_24_24[16]; + int32_t S_24_16[8]; + int32_t S_16_8[8]; +} WebRtcSpl_State48khzTo8khz; + +void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State48khzTo8khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state); + +typedef struct { + int32_t S_8_16[8]; + int32_t S_16_12[8]; + int32_t S_12_24[8]; + int32_t S_24_48[8]; +} WebRtcSpl_State8khzTo48khz; + +void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State8khzTo48khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state); + +/******************************************************************* + * resample_by_2.c + * + * Includes down and up sampling by a factor of two. + * + ******************************************************************/ + +void WebRtcSpl_DownsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState); + +void WebRtcSpl_UpsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState); + +/************************************************************ + * END OF RESAMPLING FUNCTIONS + ************************************************************/ +void WebRtcSpl_AnalysisQMF(const int16_t* in_data, + size_t in_data_length, + int16_t* low_band, + int16_t* high_band, + int32_t* filter_state1, + int32_t* filter_state2); +void WebRtcSpl_SynthesisQMF(const int16_t* low_band, + const int16_t* high_band, + size_t band_length, + int16_t* out_data, + int32_t* filter_state1, + int32_t* filter_state2); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ + +// +// WebRtcSpl_AddSatW16(...) +// WebRtcSpl_AddSatW32(...) +// +// Returns the result of a saturated 16-bit, respectively 32-bit, addition of +// the numbers specified by the `var1` and `var2` parameters. +// +// Input: +// - var1 : Input variable 1 +// - var2 : Input variable 2 +// +// Return value : Added and saturated value +// + +// +// WebRtcSpl_SubSatW16(...) +// WebRtcSpl_SubSatW32(...) +// +// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction +// of the numbers specified by the `var1` and `var2` parameters. +// +// Input: +// - var1 : Input variable 1 +// - var2 : Input variable 2 +// +// Returned value : Subtracted and saturated value +// + +// +// WebRtcSpl_GetSizeInBits(...) +// +// Returns the # of bits that are needed at the most to represent the number +// specified by the `value` parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bits needed to represent `value` +// + +// +// WebRtcSpl_NormW32(...) +// +// Norm returns the # of left shifts required to 32-bit normalize the 32-bit +// signed number specified by the `value` parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize `value` +// + +// +// WebRtcSpl_NormW16(...) +// +// Norm returns the # of left shifts required to 16-bit normalize the 16-bit +// signed number specified by the `value` parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize `value` +// + +// +// WebRtcSpl_NormU32(...) +// +// Norm returns the # of left shifts required to 32-bit normalize the unsigned +// 32-bit number specified by the `value` parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize `value` +// + +// +// WebRtcSpl_GetScalingSquare(...) +// +// Returns the # of bits required to scale the samples specified in the +// `in_vector` parameter so that, if the squares of the samples are added the +// # of times specified by the `times` parameter, the 32-bit addition will not +// overflow (result in int32_t). +// +// Input: +// - in_vector : Input vector to check scaling on +// - in_vector_length : Samples in `in_vector` +// - times : Number of additions to be performed +// +// Return value : Number of right bit shifts needed to avoid +// overflow in the addition calculation +// + +// +// WebRtcSpl_MemSetW16(...) +// +// Sets all the values in the int16_t vector `vector` of length +// `vector_length` to the specified value `set_value` +// +// Input: +// - vector : Pointer to the int16_t vector +// - set_value : Value specified +// - vector_length : Length of vector +// + +// +// WebRtcSpl_MemSetW32(...) +// +// Sets all the values in the int32_t vector `vector` of length +// `vector_length` to the specified value `set_value` +// +// Input: +// - vector : Pointer to the int16_t vector +// - set_value : Value specified +// - vector_length : Length of vector +// + +// +// WebRtcSpl_MemCpyReversedOrder(...) +// +// Copies all the values from the source int16_t vector `in_vector` to a +// destination int16_t vector `out_vector`. It is done in reversed order, +// meaning that the first sample of `in_vector` is copied to the last sample of +// the `out_vector`. The procedure continues until the last sample of +// `in_vector` has been copied to the first sample of `out_vector`. This +// creates a reversed vector. Used in e.g. prediction in iLBC. +// +// Input: +// - in_vector : Pointer to the first sample in a int16_t vector +// of length `length` +// - vector_length : Number of elements to copy +// +// Output: +// - out_vector : Pointer to the last sample in a int16_t vector +// of length `length` +// + +// +// WebRtcSpl_CopyFromEndW16(...) +// +// Copies the rightmost `samples` of `in_vector` (of length `in_vector_length`) +// to the vector `out_vector`. +// +// Input: +// - in_vector : Input vector +// - in_vector_length : Number of samples in `in_vector` +// - samples : Number of samples to extract (from right side) +// from `in_vector` +// +// Output: +// - out_vector : Vector with the requested samples +// + +// +// WebRtcSpl_ZerosArrayW16(...) +// WebRtcSpl_ZerosArrayW32(...) +// +// Inserts the value "zero" in all positions of a w16 and a w32 vector +// respectively. +// +// Input: +// - vector_length : Number of samples in vector +// +// Output: +// - vector : Vector containing all zeros +// + +// +// WebRtcSpl_VectorBitShiftW16(...) +// WebRtcSpl_VectorBitShiftW32(...) +// +// Bit shifts all the values in a vector up or downwards. Different calls for +// int16_t and int32_t vectors respectively. +// +// Input: +// - vector_length : Length of vector +// - in_vector : Pointer to the vector that should be bit shifted +// - right_shifts : Number of right bit shifts (negative value gives left +// shifts) +// +// Output: +// - out_vector : Pointer to the result vector (can be the same as +// `in_vector`) +// + +// +// WebRtcSpl_VectorBitShiftW32ToW16(...) +// +// Bit shifts all the values in a int32_t vector up or downwards and +// stores the result as an int16_t vector. The function will saturate the +// signal if needed, before storing in the output vector. +// +// Input: +// - vector_length : Length of vector +// - in_vector : Pointer to the vector that should be bit shifted +// - right_shifts : Number of right bit shifts (negative value gives left +// shifts) +// +// Output: +// - out_vector : Pointer to the result vector (can be the same as +// `in_vector`) +// + +// +// WebRtcSpl_ScaleVector(...) +// +// Performs the vector operation: +// out_vector[k] = (gain*in_vector[k])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Scaling gain +// - vector_length : Elements in the `in_vector` +// - right_shifts : Number of right bit shifts applied +// +// Output: +// - out_vector : Output vector (can be the same as `in_vector`) +// + +// +// WebRtcSpl_ScaleVectorWithSat(...) +// +// Performs the vector operation: +// out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts ) +// +// Input: +// - in_vector : Input vector +// - gain : Scaling gain +// - vector_length : Elements in the `in_vector` +// - right_shifts : Number of right bit shifts applied +// +// Output: +// - out_vector : Output vector (can be the same as `in_vector`) +// + +// +// WebRtcSpl_ScaleAndAddVectors(...) +// +// Performs the vector operation: +// out_vector[k] = (gain1*in_vector1[k])>>right_shifts1 +// + (gain2*in_vector2[k])>>right_shifts2 +// +// Input: +// - in_vector1 : Input vector 1 +// - gain1 : Gain to be used for vector 1 +// - right_shifts1 : Right bit shift to be used for vector 1 +// - in_vector2 : Input vector 2 +// - gain2 : Gain to be used for vector 2 +// - right_shifts2 : Right bit shift to be used for vector 2 +// - vector_length : Elements in the input vectors +// +// Output: +// - out_vector : Output vector +// + +// +// WebRtcSpl_ReverseOrderMultArrayElements(...) +// +// Performs the vector operation: +// out_vector[n] = (in_vector[n]*window[-n])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - window : Window vector (should be reversed). The pointer +// should be set to the last value in the vector +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in `in_vector` +// +// Output: +// - out_vector : Output vector (can be same as `in_vector`) +// + +// +// WebRtcSpl_ElementwiseVectorMult(...) +// +// Performs the vector operation: +// out_vector[n] = (in_vector[n]*window[n])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - window : Window vector. +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in `in_vector` +// +// Output: +// - out_vector : Output vector (can be same as `in_vector`) +// + +// +// WebRtcSpl_AddVectorsAndShift(...) +// +// Performs the vector operation: +// out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts +// +// Input: +// - in_vector1 : Input vector 1 +// - in_vector2 : Input vector 2 +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in `in_vector1` and `in_vector2` +// +// Output: +// - out_vector : Output vector (can be same as `in_vector1`) +// + +// +// WebRtcSpl_AddAffineVectorToVector(...) +// +// Adds an affine transformed vector to another vector `out_vector`, i.e, +// performs +// out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Gain value, used to multiply the in vector with +// - add_constant : Constant value to add (usually 1<<(right_shifts-1), +// but others can be used as well +// - right_shifts : Number of right bit shifts (0-16) +// - vector_length : Number of samples in `in_vector` and `out_vector` +// +// Output: +// - out_vector : Vector with the output +// + +// +// WebRtcSpl_AffineTransformVector(...) +// +// Affine transforms a vector, i.e, performs +// out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Gain value, used to multiply the in vector with +// - add_constant : Constant value to add (usually 1<<(right_shifts-1), +// but others can be used as well +// - right_shifts : Number of right bit shifts (0-16) +// - vector_length : Number of samples in `in_vector` and `out_vector` +// +// Output: +// - out_vector : Vector with the output +// + +// +// WebRtcSpl_IncreaseSeed(...) +// +// Increases the seed (and returns the new value) +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : The new seed value +// + +// +// WebRtcSpl_RandU(...) +// +// Produces a uniformly distributed value in the int16_t range +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : Uniformly distributed value in the range +// [Word16_MIN...Word16_MAX] +// + +// +// WebRtcSpl_RandN(...) +// +// Produces a normal distributed value in the int16_t range +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : N(0,1) value in the Q13 domain +// + +// +// WebRtcSpl_RandUArray(...) +// +// Produces a uniformly distributed vector with elements in the int16_t +// range +// +// Input: +// - vector_length : Samples wanted in the vector +// - seed : Seed for random calculation +// +// Output: +// - vector : Vector with the uniform values +// - seed : Updated seed value +// +// Return value : Number of samples in vector, i.e., `vector_length` +// + +// +// WebRtcSpl_Sqrt(...) +// +// Returns the square root of the input value `value`. The precision of this +// function is integer precision, i.e., sqrt(8) gives 2 as answer. +// If `value` is a negative number then 0 is returned. +// +// Algorithm: +// +// A sixth order Taylor Series expansion is used here to compute the square +// root of a number y^0.5 = (1+x)^0.5 +// where +// x = y-1 +// = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) +// 0.5 <= x < 1 +// +// Input: +// - value : Value to calculate sqrt of +// +// Return value : Result of the sqrt calculation +// + +// +// WebRtcSpl_DivU32U16(...) +// +// Divides a uint32_t `num` by a uint16_t `den`. +// +// If `den`==0, (uint32_t)0xFFFFFFFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a uint32_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivW32W16(...) +// +// Divides a int32_t `num` by a int16_t `den`. +// +// If `den`==0, (int32_t)0x7FFFFFFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a int32_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivW32W16ResW16(...) +// +// Divides a int32_t `num` by a int16_t `den`, assuming that the +// result is less than 32768, otherwise an unpredictable result will occur. +// +// If `den`==0, (int16_t)0x7FFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a int16_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivResultInQ31(...) +// +// Divides a int32_t `num` by a int16_t `den`, assuming that the +// absolute value of the denominator is larger than the numerator, otherwise +// an unpredictable result will occur. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division in Q31. +// + +// +// WebRtcSpl_DivW32HiLow(...) +// +// Divides a int32_t `num` by a denominator in hi, low format. The +// absolute value of the denominator has to be larger (or equal to) the +// numerator. +// +// Input: +// - num : Numerator +// - den_hi : High part of denominator +// - den_low : Low part of denominator +// +// Return value : Divided value in Q31 +// + +// +// WebRtcSpl_Energy(...) +// +// Calculates the energy of a vector +// +// Input: +// - vector : Vector which the energy should be calculated on +// - vector_length : Number of samples in vector +// +// Output: +// - scale_factor : Number of left bit shifts needed to get the physical +// energy value, i.e, to get the Q0 value +// +// Return value : Energy value in Q(-`scale_factor`) +// + +// +// WebRtcSpl_FilterAR(...) +// +// Performs a 32-bit AR filtering on a vector in Q12 +// +// Input: +// - ar_coef : AR-coefficient vector (values in Q12), +// ar_coef[0] must be 4096. +// - ar_coef_length : Number of coefficients in `ar_coef`. +// - in_vector : Vector to be filtered. +// - in_vector_length : Number of samples in `in_vector`. +// - filter_state : Current state (higher part) of the filter. +// - filter_state_length : Length (in samples) of `filter_state`. +// - filter_state_low : Current state (lower part) of the filter. +// - filter_state_low_length : Length (in samples) of `filter_state_low`. +// - out_vector_low_length : Maximum length (in samples) of +// `out_vector_low`. +// +// Output: +// - filter_state : Updated state (upper part) vector. +// - filter_state_low : Updated state (lower part) vector. +// - out_vector : Vector containing the upper part of the +// filtered values. +// - out_vector_low : Vector containing the lower part of the +// filtered values. +// +// Return value : Number of samples in the `out_vector`. +// + +// +// WebRtcSpl_ComplexIFFT(...) +// +// Complex Inverse FFT +// +// Computes an inverse complex 2^`stages`-point FFT on the input vector, which +// is in bit-reversed order. The original content of the vector is destroyed in +// the process, since the input is overwritten by the output, normal-ordered, +// FFT vector. With X as the input complex vector, y as the output complex +// vector and with M = 2^`stages`, the following is computed: +// +// M-1 +// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]] +// i=0 +// +// The implementations are optimized for speed, not for code size. It uses the +// decimation-in-time algorithm with radix-2 butterfly technique. +// +// Input: +// - vector : In pointer to complex vector containing 2^`stages` +// real elements interleaved with 2^`stages` imaginary +// elements. +// [ReImReImReIm....] +// The elements are in Q(-scale) domain, see more on Return +// Value below. +// +// - stages : Number of FFT stages. Must be at least 3 and at most 10, +// since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// - mode : This parameter gives the user to choose how the FFT +// should work. +// mode==0: Low-complexity and Low-accuracy mode +// mode==1: High-complexity and High-accuracy mode +// +// Output: +// - vector : Out pointer to the FFT vector (the same as input). +// +// Return Value : The scale value that tells the number of left bit shifts +// that the elements in the `vector` should be shifted with +// in order to get Q0 values, i.e. the physically correct +// values. The scale parameter is always 0 or positive, +// except if N>1024 (`stages`>10), which returns a scale +// value of -1, indicating error. +// + +// +// WebRtcSpl_ComplexFFT(...) +// +// Complex FFT +// +// Computes a complex 2^`stages`-point FFT on the input vector, which is in +// bit-reversed order. The original content of the vector is destroyed in +// the process, since the input is overwritten by the output, normal-ordered, +// FFT vector. With x as the input complex vector, Y as the output complex +// vector and with M = 2^`stages`, the following is computed: +// +// M-1 +// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]] +// i=0 +// +// The implementations are optimized for speed, not for code size. It uses the +// decimation-in-time algorithm with radix-2 butterfly technique. +// +// This routine prevents overflow by scaling by 2 before each FFT stage. This is +// a fixed scaling, for proper normalization - there will be log2(n) passes, so +// this results in an overall factor of 1/n, distributed to maximize arithmetic +// accuracy. +// +// Input: +// - vector : In pointer to complex vector containing 2^`stages` real +// elements interleaved with 2^`stages` imaginary elements. +// [ReImReImReIm....] +// The output is in the Q0 domain. +// +// - stages : Number of FFT stages. Must be at least 3 and at most 10, +// since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// - mode : This parameter gives the user to choose how the FFT +// should work. +// mode==0: Low-complexity and Low-accuracy mode +// mode==1: High-complexity and High-accuracy mode +// +// Output: +// - vector : The output FFT vector is in the Q0 domain. +// +// Return value : The scale parameter is always 0, except if N>1024, +// which returns a scale value of -1, indicating error. +// + +// +// WebRtcSpl_AnalysisQMF(...) +// +// Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The +// current version has F = 8000, therefore, a super-wideband audio signal is +// split to lower-band 0-8 kHz and upper-band 8-16 kHz. +// +// Input: +// - in_data : Wide band speech signal, 320 samples (10 ms) +// +// Input & Output: +// - filter_state1 : Filter state for first All-pass filter +// - filter_state2 : Filter state for second All-pass filter +// +// Output: +// - low_band : Lower-band signal 0-8 kHz band, 160 samples (10 ms) +// - high_band : Upper-band signal 8-16 kHz band (flipped in frequency +// domain), 160 samples (10 ms) +// + +// +// WebRtcSpl_SynthesisQMF(...) +// +// Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F +// Hz, (current version has F = 8000 Hz). So the filter combines lower-band +// (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16 +// kHz audio. +// +// Input: +// - low_band : The signal with the 0-8 kHz band, 160 samples (10 ms) +// - high_band : The signal with the 8-16 kHz band, 160 samples (10 ms) +// +// Input & Output: +// - filter_state1 : Filter state for first All-pass filter +// - filter_state2 : Filter state for second All-pass filter +// +// Output: +// - out_data : Super-wideband speech signal, 0-16 kHz +// + +// int16_t WebRtcSpl_SatW32ToW16(...) +// +// This function saturates a 32-bit word into a 16-bit word. +// +// Input: +// - value32 : The value of a 32-bit word. +// +// Output: +// - out16 : the saturated 16-bit word. +// + +// int32_t WebRtc_MulAccumW16(...) +// +// This function multiply a 16-bit word by a 16-bit word, and accumulate this +// value to a 32-bit integer. +// +// Input: +// - a : The value of the first 16-bit word. +// - b : The value of the second 16-bit word. +// - c : The value of an 32-bit integer. +// +// Return Value: The value of a * b + c. +// diff --git a/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl.h b/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl.h new file mode 100644 index 0000000000..2b0995886a --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ + +#include <stdint.h> + +#include "rtc_base/compile_assert_c.h" + +extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64]; + +// Don't call this directly except in tests! +static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) { + // Normalize n by rounding up to the nearest number that is a sequence of 0 + // bits followed by a sequence of 1 bits. This number has the same number of + // leading zeros as the original n. There are exactly 33 such values. + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + + // Multiply the modified n with a constant selected (by exhaustive search) + // such that each of the 33 possible values of n give a product whose 6 most + // significant bits are unique. Then look up the answer in the table. + return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26]; +} + +// Don't call this directly except in tests! +static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) { + const int leading_zeros = n >> 32 == 0 ? 32 : 0; + return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin( + (uint32_t)(n >> (32 - leading_zeros))); +} + +// Returns the number of leading zero bits in the argument. +static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) { +#ifdef __GNUC__ + RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t)); + return n == 0 ? 32 : __builtin_clz(n); +#else + return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n); +#endif +} + +// Returns the number of leading zero bits in the argument. +static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) { +#ifdef __GNUC__ + RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT + return n == 0 ? 64 : __builtin_clzll(n); +#else + return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n); +#endif +} + +#ifdef WEBRTC_ARCH_ARM_V7 +#include "common_audio/signal_processing/include/spl_inl_armv7.h" +#else + +#if defined(MIPS32_LE) +#include "common_audio/signal_processing/include/spl_inl_mips.h" +#endif + +#if !defined(MIPS_DSP_R1_LE) +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + int16_t out16 = (int16_t)value32; + + if (value32 > 32767) + out16 = 32767; + else if (value32 < -32768) + out16 = -32768; + + return out16; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) { + // Do the addition in unsigned numbers, since signed overflow is undefined + // behavior. + const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b); + + // a + b can't overflow if a and b have different signs. If they have the + // same sign, a + b also has the same sign iff it didn't overflow. + if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) { + // The direction of the overflow is obvious from the sign of a + b. + return sum < 0 ? INT32_MAX : INT32_MIN; + } + return sum; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) { + // Do the subtraction in unsigned numbers, since signed overflow is undefined + // behavior. + const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b); + + // a - b can't overflow if a and b have the same sign. If they have different + // signs, a - b has the same sign as a iff it didn't overflow. + if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) { + // The direction of the overflow is obvious from the sign of a - b. + return diff < 0 ? INT32_MAX : INT32_MIN; + } + return diff; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b); +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2); +} +#endif // #if !defined(MIPS_DSP_R1_LE) + +#if !defined(MIPS32_LE) +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + return 32 - WebRtcSpl_CountLeadingZeros32(n); +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1; +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a); +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + const int32_t a32 = a; + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17; +} + +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + return (a * b + c); +} +#endif // #if !defined(MIPS32_LE) + +#endif // WEBRTC_ARCH_ARM_V7 + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ diff --git a/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl_armv7.h b/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl_armv7.h new file mode 100644 index 0000000000..6fc3e7c1b8 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl_armv7.h @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* This header file includes the inline functions for ARM processors in + * the fix point signal processing library. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_ + +#include <stdint.h> + +/* TODO(kma): Replace some assembly code with GCC intrinsics + * (e.g. __builtin_clz). + */ + +/* This function produces result that is not bit exact with that by the generic + * C version in some cases, although the former is at least as accurate as the + * later. + */ +static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) { + int32_t tmp = 0; + __asm __volatile("smulwb %0, %1, %2" : "=r"(tmp) : "r"(b), "r"(a)); + return tmp; +} + +static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) { + int32_t tmp = 0; + __asm __volatile("smulbb %0, %1, %2" : "=r"(tmp) : "r"(a), "r"(b)); + return tmp; +} + +// TODO(kma): add unit test. +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + int32_t tmp = 0; + __asm __volatile("smlabb %0, %1, %2, %3" + : "=r"(tmp) + : "r"(a), "r"(b), "r"(c)); + return tmp; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + int32_t s_sum = 0; + + __asm __volatile("qadd16 %0, %1, %2" : "=r"(s_sum) : "r"(a), "r"(b)); + + return (int16_t)s_sum; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum = 0; + + __asm __volatile("qadd %0, %1, %2" : "=r"(l_sum) : "r"(l_var1), "r"(l_var2)); + + return l_sum; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sub = 0; + + __asm __volatile("qsub %0, %1, %2" : "=r"(l_sub) : "r"(l_var1), "r"(l_var2)); + + return l_sub; +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + int32_t s_sub = 0; + + __asm __volatile("qsub16 %0, %1, %2" : "=r"(s_sub) : "r"(var1), "r"(var2)); + + return (int16_t)s_sub; +} + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int32_t tmp = 0; + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(n)); + + return (int16_t)(32 - tmp); +} + +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + int32_t tmp = 0; + + if (a == 0) { + return 0; + } else if (a < 0) { + a ^= 0xFFFFFFFF; + } + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a)); + + return (int16_t)(tmp - 1); +} + +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + int tmp = 0; + + if (a == 0) + return 0; + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a)); + + return (int16_t)tmp; +} + +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + int32_t tmp = 0; + int32_t a_32 = a; + + if (a_32 == 0) { + return 0; + } else if (a_32 < 0) { + a_32 ^= 0xFFFFFFFF; + } + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a_32)); + + return (int16_t)(tmp - 17); +} + +// TODO(kma): add unit test. +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + int32_t out = 0; + + __asm __volatile("ssat %0, #16, %1" : "=r"(out) : "r"(value32)); + + return (int16_t)out; +} + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_ diff --git a/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl_mips.h b/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl_mips.h new file mode 100644 index 0000000000..1db95e8254 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/include/spl_inl_mips.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_ + +static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a, int32_t b) { + int32_t value32 = 0; + int32_t a1 = 0, b1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" + "seh %[b1], %[b] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sll %[b1], %[b], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" + "sra %[b1], %[b1], 16 \n\t" +#endif + "mul %[value32], %[a1], %[b1] \n\t" + : [value32] "=r"(value32), [a1] "=&r"(a1), [b1] "=&r"(b1) + : [a] "r"(a), [b] "r"(b) + : "hi", "lo"); + return value32; +} + +static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) { + int32_t value32 = 0, b1 = 0, b2 = 0; + int32_t a1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" +#endif + "andi %[b2], %[b], 0xFFFF \n\t" + "sra %[b1], %[b], 16 \n\t" + "sra %[b2], %[b2], 1 \n\t" + "mul %[value32], %[a1], %[b1] \n\t" + "mul %[b2], %[a1], %[b2] \n\t" + "addiu %[b2], %[b2], 0x4000 \n\t" + "sra %[b2], %[b2], 15 \n\t" + "addu %[value32], %[value32], %[b2] \n\t" + : [value32] "=&r"(value32), [b1] "=&r"(b1), [b2] "=&r"(b2), [a1] "=&r"(a1) + : [a] "r"(a), [b] "r"(b) + : "hi", "lo"); + return value32; +} + +#if defined(MIPS_DSP_R1_LE) +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + __asm __volatile( + "shll_s.w %[value32], %[value32], 16 \n\t" + "sra %[value32], %[value32], 16 \n\t" + : [value32] "+r"(value32) + :); + int16_t out16 = (int16_t)value32; + return out16; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + int32_t value32 = 0; + + __asm __volatile("addq_s.ph %[value32], %[a], %[b] \n\t" + : [value32] "=r"(value32) + : [a] "r"(a), [b] "r"(b)); + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum; + + __asm __volatile( + "addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t" + : [l_sum] "=r"(l_sum) + : [l_var1] "r"(l_var1), [l_var2] "r"(l_var2)); + + return l_sum; +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + int32_t value32; + + __asm __volatile("subq_s.ph %[value32], %[var1], %[var2] \n\t" + : [value32] "=r"(value32) + : [var1] "r"(var1), [var2] "r"(var2)); + + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_diff; + + __asm __volatile( + "subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t" + : [l_diff] "=r"(l_diff) + : [l_var1] "r"(l_var1), [l_var2] "r"(l_var2)); + + return l_diff; +} +#endif + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int bits = 0; + int i32 = 32; + + __asm __volatile( + "clz %[bits], %[n] \n\t" + "subu %[bits], %[i32], %[bits] \n\t" + : [bits] "=&r"(bits) + : [n] "r"(n), [i32] "r"(i32)); + + return (int16_t)bits; +} + +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + int zeros = 0; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a], 1f \n\t" + " sra %[zeros], %[a], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros] "=&r"(zeros) + : [a] "r"(a)); + + return (int16_t)zeros; +} + +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + int zeros = 0; + + __asm __volatile("clz %[zeros], %[a] \n\t" + : [zeros] "=r"(zeros) + : [a] "r"(a)); + + return (int16_t)(zeros & 0x1f); +} + +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + int zeros = 0; + int a0 = a << 16; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a0], 1f \n\t" + " sra %[zeros], %[a0], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a0], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros] "=&r"(zeros) + : [a0] "r"(a0)); + + return (int16_t)zeros; +} + +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + int32_t res = 0, c1 = 0; + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a], %[a] \n\t" + "seh %[b], %[b] \n\t" +#else + "sll %[a], %[a], 16 \n\t" + "sll %[b], %[b], 16 \n\t" + "sra %[a], %[a], 16 \n\t" + "sra %[b], %[b], 16 \n\t" +#endif + "mul %[res], %[a], %[b] \n\t" + "addu %[c1], %[c], %[res] \n\t" + : [c1] "=r"(c1), [res] "=&r"(res) + : [a] "r"(a), [b] "r"(b), [c] "r"(c) + : "hi", "lo"); + return (c1); +} + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_ diff --git a/third_party/libwebrtc/common_audio/signal_processing/levinson_durbin.c b/third_party/libwebrtc/common_audio/signal_processing/levinson_durbin.c new file mode 100644 index 0000000000..2c5cbaeeaa --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/levinson_durbin.c @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_LevinsonDurbin(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/sanitizer.h" + +#define SPL_LEVINSON_MAXORDER 20 + +int16_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K, + size_t order) +{ + size_t i, j; + // Auto-correlation coefficients in high precision + int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1]; + // LPC coefficients in high precision + int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1]; + // LPC coefficients for next iteration + int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1]; + // Reflection coefficient in high precision + int16_t K_hi, K_low; + // Prediction gain Alpha in high precision and with scale factor + int16_t Alpha_hi, Alpha_low, Alpha_exp; + int16_t tmp_hi, tmp_low; + int32_t temp1W32, temp2W32, temp3W32; + int16_t norm; + + // Normalize the autocorrelation R[0]...R[order+1] + + norm = WebRtcSpl_NormW32(R[0]); + + for (i = 0; i <= order; ++i) + { + temp1W32 = R[i] * (1 << norm); + // UBSan: 12 * 268435456 cannot be represented in type 'int' + + // Put R in hi and low format + R_hi[i] = (int16_t)(temp1W32 >> 16); + R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] * 65536)) >> 1); + } + + // K = A[1] = -R[1] / R[0] + + temp2W32 = R[1] * (1 << norm); // R[1] in Q31 + temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1] + temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31 + // Put back the sign on R[1] + if (temp2W32 > 0) + { + temp1W32 = -temp1W32; + } + + // Put K in hi and low format + K_hi = (int16_t)(temp1W32 >> 16); + K_low = (int16_t)((temp1W32 - ((int32_t)K_hi * 65536)) >> 1); + + // Store first reflection coefficient + K[0] = K_hi; + + temp1W32 >>= 4; // A[1] in Q27. + + // Put A[1] in hi and low format + A_hi[1] = (int16_t)(temp1W32 >> 16); + A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] * 65536)) >> 1); + + // Alpha = R[0] * (1-K^2) + + temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // = k^2 in Q31 + + temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 + temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31 + + // Store temp1W32 = 1 - K[0]*K[0] on hi and low format + tmp_hi = (int16_t)(temp1W32 >> 16); + tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Calculate Alpha in Q31 + temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) + + (R_low[0] * tmp_hi >> 15)) << 1; + + // Normalize Alpha and put it in hi and low format + + Alpha_exp = WebRtcSpl_NormW32(temp1W32); + temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp); + Alpha_hi = (int16_t)(temp1W32 >> 16); + Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); + + // Perform the iterative calculations in the Levinson-Durbin algorithm + + for (i = 2; i <= order; i++) + { + /* ---- + temp1W32 = R[i] + > R[j]*A[i-j] + / + ---- + j=1..i-1 + */ + + temp1W32 = 0; + + for (j = 1; j < i; j++) + { + // temp1W32 is in Q31 + temp1W32 += (R_hi[j] * A_hi[i - j] * 2) + + (((R_hi[j] * A_low[i - j] >> 15) + + (R_low[j] * A_hi[i - j] >> 15)) * 2); + } + + temp1W32 = temp1W32 * 16; + temp1W32 += ((int32_t)R_hi[i] * 65536) + + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1); + + // K = -temp1W32 / Alpha + temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32) + temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha + + // Put the sign of temp1W32 back again + if (temp1W32 > 0) + { + temp3W32 = -temp3W32; + } + + // Use the Alpha shifts from earlier to de-normalize + norm = WebRtcSpl_NormW32(temp3W32); + if ((Alpha_exp <= norm) || (temp3W32 == 0)) + { + temp3W32 = temp3W32 * (1 << Alpha_exp); + } else + { + if (temp3W32 > 0) + { + temp3W32 = (int32_t)0x7fffffffL; + } else + { + temp3W32 = (int32_t)0x80000000L; + } + } + + // Put K on hi and low format + K_hi = (int16_t)(temp3W32 >> 16); + K_low = (int16_t)((temp3W32 - ((int32_t)K_hi * 65536)) >> 1); + + // Store Reflection coefficient in Q15 + K[i - 1] = K_hi; + + // Test for unstable filter. + // If unstable return 0 and let the user decide what to do in that case + + if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750) + { + return 0; // Unstable filter + } + + /* + Compute updated LPC coefficient: Anew[i] + Anew[j]= A[j] + K*A[i-j] for j=1..i-1 + Anew[i]= K + */ + + for (j = 1; j < i; j++) + { + // temp1W32 = A[j] in Q27 + temp1W32 = (int32_t)A_hi[j] * 65536 + + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1); + + // temp1W32 += K*A[i-j] in Q27 + temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) + + (K_low * A_hi[i - j] >> 15)) * 2; + + // Put Anew in hi and low format + A_upd_hi[j] = (int16_t)(temp1W32 >> 16); + A_upd_low[j] = (int16_t)( + (temp1W32 - ((int32_t)A_upd_hi[j] * 65536)) >> 1); + } + + // temp3W32 = K in Q27 (Convert from Q31 to Q27) + temp3W32 >>= 4; + + // Store Anew in hi and low format + A_upd_hi[i] = (int16_t)(temp3W32 >> 16); + A_upd_low[i] = (int16_t)( + (temp3W32 - ((int32_t)A_upd_hi[i] * 65536)) >> 1); + + // Alpha = Alpha * (1-K^2) + + temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // K*K in Q31 + + temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 + temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31 + + // Convert 1- K^2 in hi and low format + tmp_hi = (int16_t)(temp1W32 >> 16); + tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Calculate Alpha = Alpha * (1-K^2) in Q31 + temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) + + (Alpha_low * tmp_hi >> 15)) << 1; + + // Normalize Alpha and store it on hi and low format + + norm = WebRtcSpl_NormW32(temp1W32); + temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm); + + Alpha_hi = (int16_t)(temp1W32 >> 16); + Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); + + // Update the total normalization of Alpha + Alpha_exp = Alpha_exp + norm; + + // Update A[] + + for (j = 1; j <= i; j++) + { + A_hi[j] = A_upd_hi[j]; + A_low[j] = A_upd_low[j]; + } + } + + /* + Set A[0] to 1.0 and store the A[i] i=1...order in Q12 + (Convert from Q27 and use rounding) + */ + + A[0] = 4096; + + for (i = 1; i <= order; i++) + { + // temp1W32 in Q27 + temp1W32 = (int32_t)A_hi[i] * 65536 + + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1); + // Round and store upper word + A[i] = (int16_t)(((temp1W32 * 2) + 32768) >> 16); + } + return 1; // Stable filters +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/lpc_to_refl_coef.c b/third_party/libwebrtc/common_audio/signal_processing/lpc_to_refl_coef.c new file mode 100644 index 0000000000..7a5e25191b --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/lpc_to_refl_coef.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_LpcToReflCoef(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50 + +void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16) +{ + int m, k; + int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER]; + int32_t tmp_inv_denom32; + int16_t tmp_inv_denom16; + + k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15 + for (m = use_order - 1; m > 0; m--) + { + // (1 - k^2) in Q30 + tmp_inv_denom32 = 1073741823 - k16[m] * k16[m]; + // (1 - k^2) in Q15 + tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15); + + for (k = 1; k <= m; k++) + { + // tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]); + + // [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28 + tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1); + + tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13 + } + + for (k = 1; k < m; k++) + { + a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12 + } + + tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191); + k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15 + } + return; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/min_max_operations.c b/third_party/libwebrtc/common_audio/signal_processing/min_max_operations.c new file mode 100644 index 0000000000..1b9542e7ef --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/min_max_operations.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the implementation of functions + * WebRtcSpl_MaxAbsValueW16C() + * WebRtcSpl_MaxAbsValueW32C() + * WebRtcSpl_MaxValueW16C() + * WebRtcSpl_MaxValueW32C() + * WebRtcSpl_MinValueW16C() + * WebRtcSpl_MinValueW32C() + * WebRtcSpl_MaxAbsIndexW16() + * WebRtcSpl_MaxIndexW16() + * WebRtcSpl_MaxIndexW32() + * WebRtcSpl_MinIndexW16() + * WebRtcSpl_MinIndexW32() + * + */ + +#include <stdlib.h> + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bjorn/kma): Consolidate function pairs (e.g. combine +// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) +// TODO(kma): Move the next six functions into min_max_operations_c.c. + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) { + size_t i = 0; + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + } + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t)maximum; +} + +// Maximum absolute value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + if (absolute > maximum) { + maximum = absolute; + } + } + + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t)maximum; +} + +// Maximum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Maximum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Minimum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Minimum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Index of maximum absolute value in a word16 vector. +size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) { + // Use type int for local variables, to accomodate the value of abs(-32768). + + size_t i = 0, index = 0; + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + index = i; + } + } + + return index; +} + +int16_t WebRtcSpl_MaxAbsElementW16(const int16_t* vector, size_t length) { + int16_t min_val, max_val; + WebRtcSpl_MinMaxW16(vector, length, &min_val, &max_val); + if (min_val == max_val || min_val < -max_val) { + return min_val; + } + return max_val; +} + +// Index of maximum value in a word16 vector. +size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of maximum value in a word32 vector. +size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word16 vector. +size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word32 vector. +size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} + +// Finds both the minimum and maximum elements in an array of 16-bit integers. +void WebRtcSpl_MinMaxW16(const int16_t* vector, size_t length, + int16_t* min_val, int16_t* max_val) { +#if defined(WEBRTC_HAS_NEON) + return WebRtcSpl_MinMaxW16Neon(vector, length, min_val, max_val); +#else + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + if (vector[i] > maximum) + maximum = vector[i]; + } + *min_val = minimum; + *max_val = maximum; +#endif +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/min_max_operations_mips.c b/third_party/libwebrtc/common_audio/signal_processing/min_max_operations_mips.c new file mode 100644 index 0000000000..8a7fc65c42 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/min_max_operations_mips.c @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the implementation of function + * WebRtcSpl_MaxAbsValueW16() + * + * The description header can be found in signal_processing_library.h. + * + */ + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// Maximum absolute value of word16 vector. +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) { + int32_t totMax = 0; + int32_t tmp32_0, tmp32_1, tmp32_2, tmp32_3; + size_t i, loop_size; + + RTC_DCHECK_GT(length, 0); + +#if defined(MIPS_DSP_R1) + const int32_t* tmpvec32 = (int32_t*)vector; + loop_size = length >> 4; + + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lw %[tmp32_0], 0(%[tmpvec32]) \n\t" + "lw %[tmp32_1], 4(%[tmpvec32]) \n\t" + "lw %[tmp32_2], 8(%[tmpvec32]) \n\t" + "lw %[tmp32_3], 12(%[tmpvec32]) \n\t" + + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + + "lw %[tmp32_0], 16(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + + "lw %[tmp32_1], 20(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + + "lw %[tmp32_2], 24(%[tmpvec32]) \n\t" + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + + "lw %[tmp32_3], 28(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + + "addiu %[tmpvec32], %[tmpvec32], 32 \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), + [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32) + : + : "memory" + ); + } + __asm__ volatile ( + "rotr %[tmp32_0], %[totMax], 16 \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + "packrl.ph %[totMax], $0, %[totMax] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax) + : + ); + loop_size = length & 0xf; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvec32]) \n\t" + "addiu %[tmpvec32], %[tmpvec32], 2 \n\t" + "absq_s.w %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax) + : + : "memory" + ); + } +#else // #if defined(MIPS_DSP_R1) + int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX; + int32_t r, r1, r2, r3; + const int16_t* tmpvector = vector; + loop_size = length >> 4; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "lh %[tmp32_1], 2(%[tmpvector]) \n\t" + "lh %[tmp32_2], 4(%[tmpvector]) \n\t" + "lh %[tmp32_3], 6(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 8(%[tmpvector]) \n\t" + "lh %[tmp32_1], 10(%[tmpvector]) \n\t" + "lh %[tmp32_2], 12(%[tmpvector]) \n\t" + "lh %[tmp32_3], 14(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 16(%[tmpvector]) \n\t" + "lh %[tmp32_1], 18(%[tmpvector]) \n\t" + "lh %[tmp32_2], 20(%[tmpvector]) \n\t" + "lh %[tmp32_3], 22(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "lh %[tmp32_0], 24(%[tmpvector]) \n\t" + "lh %[tmp32_1], 26(%[tmpvector]) \n\t" + "lh %[tmp32_2], 28(%[tmpvector]) \n\t" + "lh %[tmp32_3], 30(%[tmpvector]) \n\t" + + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" + + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" + + "addiu %[tmpvector], %[tmpvector], 32 \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), + [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector), + [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3) + : + : "memory" + ); + } + loop_size = length & 0xf; + for (i = 0; i < loop_size; i++) { + __asm__ volatile ( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "addiu %[tmpvector], %[tmpvector], 2 \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), + [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax) + : + : "memory" + ); + } + + __asm__ volatile ( + "slt %[r], %[v16MaxMax], %[totMax] \n\t" + "movn %[totMax], %[v16MaxMax], %[r] \n\t" + : [totMax] "+r" (totMax), [r] "=&r" (r) + : [v16MaxMax] "r" (v16MaxMax) + ); +#endif // #if defined(MIPS_DSP_R1) + return (int16_t)totMax; +} + +#if defined(MIPS_DSP_R1_LE) +// Maximum absolute value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + int tmp1 = 0, max_value = 0x7fffffff; + + RTC_DCHECK_GT(length, 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[absolute], 0(%[vector]) \n\t" + "absq_s.w %[absolute], %[absolute] \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[absolute] \n\t" + "movn %[maximum], %[absolute], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + "slt %[tmp1], %[max_value], %[maximum] \n\t" + "movn %[maximum], %[max_value], %[tmp1] \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute) + : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value) + : "memory" + ); + + return (int32_t)maximum; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Maximum value of word16 vector. Version for MIPS platform. +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + int tmp1; + int16_t value; + + RTC_DCHECK_GT(length, 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return maximum; +} + +// Maximum value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + int tmp1, value; + + RTC_DCHECK_GT(length, 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return maximum; +} + +// Minimum value of word16 vector. Version for MIPS platform. +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + int tmp1; + int16_t value; + + RTC_DCHECK_GT(length, 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return minimum; +} + +// Minimum value of word32 vector. Version for MIPS platform. +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + int tmp1, value; + + RTC_DCHECK_GT(length, 0); + + __asm__ volatile ( + ".set push \n\t" + ".set noreorder \n\t" + + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + + ".set pop \n\t" + + : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) + : [vector] "r" (vector), [length] "r" (length) + : "memory" + ); + + return minimum; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/min_max_operations_neon.c b/third_party/libwebrtc/common_audio/signal_processing/min_max_operations_neon.c new file mode 100644 index 0000000000..e5b4b7c71b --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/min_max_operations_neon.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> +#include <stdlib.h> + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + const int16_t* p_start = vector; + size_t rest = length & 7; + const int16_t* p_end = vector + length - rest; + + int16x8_t v; + uint16x8_t max_qv; + max_qv = vdupq_n_u16(0); + + while (p_start < p_end) { + v = vld1q_s16(p_start); + // Note vabs doesn't change the value of -32768. + v = vabsq_s16(v); + // Use u16 so we don't lose the value -32768. + max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v)); + p_start += 8; + } + +#ifdef WEBRTC_ARCH_ARM64 + maximum = (int)vmaxvq_u16(max_qv); +#else + uint16x4_t max_dv; + max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv)); + max_dv = vpmax_u16(max_dv, max_dv); + max_dv = vpmax_u16(max_dv, max_dv); + + maximum = (int)vget_lane_u16(max_dv, 0); +#endif + + p_end = vector + length; + while (p_start < p_end) { + absolute = abs((int)(*p_start)); + + if (absolute > maximum) { + maximum = absolute; + } + p_start++; + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t)maximum; +} + +// Maximum absolute value of word32 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + size_t i = 0; + size_t residual = length & 0x7; + + RTC_DCHECK_GT(length, 0); + + const int32_t* p_start = vector; + uint32x4_t max32x4_0 = vdupq_n_u32(0); + uint32x4_t max32x4_1 = vdupq_n_u32(0); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int32x4_t in32x4_0 = vld1q_s32(p_start); + p_start += 4; + int32x4_t in32x4_1 = vld1q_s32(p_start); + p_start += 4; + in32x4_0 = vabsq_s32(in32x4_0); + in32x4_1 = vabsq_s32(in32x4_1); + // vabs doesn't change the value of 0x80000000. + // Use u32 so we don't lose the value 0x80000000. + max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0)); + max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1)); + } + + uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1); +#if defined(WEBRTC_ARCH_ARM64) + maximum = vmaxvq_u32(max32x4); +#else + uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4)); + max32x2 = vpmax_u32(max32x2, max32x2); + + maximum = vget_lane_u32(max32x2, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + absolute = abs((int)(*p_start)); + if (absolute > maximum) { + maximum = absolute; + } + p_start++; + } + + // Guard against the case for 0x80000000. + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t)maximum; +} + +// Maximum value of word16 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + size_t residual = length & 0x7; + + RTC_DCHECK_GT(length, 0); + + const int16_t* p_start = vector; + int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int16x8_t in16x8 = vld1q_s16(p_start); + max16x8 = vmaxq_s16(max16x8, in16x8); + p_start += 8; + } + +#if defined(WEBRTC_ARCH_ARM64) + maximum = vmaxvq_s16(max16x8); +#else + int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8)); + max16x4 = vpmax_s16(max16x4, max16x4); + max16x4 = vpmax_s16(max16x4, max16x4); + + maximum = vget_lane_s16(max16x4, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start > maximum) + maximum = *p_start; + p_start++; + } + return maximum; +} + +// Maximum value of word32 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + size_t i = 0; + size_t residual = length & 0x7; + + RTC_DCHECK_GT(length, 0); + + const int32_t* p_start = vector; + int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); + int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int32x4_t in32x4_0 = vld1q_s32(p_start); + p_start += 4; + int32x4_t in32x4_1 = vld1q_s32(p_start); + p_start += 4; + max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0); + max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1); + } + + int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1); +#if defined(WEBRTC_ARCH_ARM64) + maximum = vmaxvq_s32(max32x4); +#else + int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4)); + max32x2 = vpmax_s32(max32x2, max32x2); + + maximum = vget_lane_s32(max32x2, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start > maximum) + maximum = *p_start; + p_start++; + } + return maximum; +} + +// Minimum value of word16 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + size_t i = 0; + size_t residual = length & 0x7; + + RTC_DCHECK_GT(length, 0); + + const int16_t* p_start = vector; + int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int16x8_t in16x8 = vld1q_s16(p_start); + min16x8 = vminq_s16(min16x8, in16x8); + p_start += 8; + } + +#if defined(WEBRTC_ARCH_ARM64) + minimum = vminvq_s16(min16x8); +#else + int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8)); + min16x4 = vpmin_s16(min16x4, min16x4); + min16x4 = vpmin_s16(min16x4, min16x4); + + minimum = vget_lane_s16(min16x4, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start < minimum) + minimum = *p_start; + p_start++; + } + return minimum; +} + +// Minimum value of word32 vector. NEON intrinsics version for +// ARM 32-bit/64-bit platforms. +int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + size_t i = 0; + size_t residual = length & 0x7; + + RTC_DCHECK_GT(length, 0); + + const int32_t* p_start = vector; + int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); + int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int32x4_t in32x4_0 = vld1q_s32(p_start); + p_start += 4; + int32x4_t in32x4_1 = vld1q_s32(p_start); + p_start += 4; + min32x4_0 = vminq_s32(min32x4_0, in32x4_0); + min32x4_1 = vminq_s32(min32x4_1, in32x4_1); + } + + int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1); +#if defined(WEBRTC_ARCH_ARM64) + minimum = vminvq_s32(min32x4); +#else + int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4)); + min32x2 = vpmin_s32(min32x2, min32x2); + + minimum = vget_lane_s32(min32x2, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start < minimum) + minimum = *p_start; + p_start++; + } + return minimum; +} + +// Finds both the minimum and maximum elements in an array of 16-bit integers. +void WebRtcSpl_MinMaxW16Neon(const int16_t* vector, size_t length, + int16_t* min_val, int16_t* max_val) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + size_t residual = length & 0x7; + + RTC_DCHECK_GT(length, 0); + + const int16_t* p_start = vector; + int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX); + int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN); + + // First part, unroll the loop 8 times. + for (i = 0; i < length - residual; i += 8) { + int16x8_t in16x8 = vld1q_s16(p_start); + min16x8 = vminq_s16(min16x8, in16x8); + max16x8 = vmaxq_s16(max16x8, in16x8); + p_start += 8; + } + +#if defined(WEBRTC_ARCH_ARM64) + minimum = vminvq_s16(min16x8); + maximum = vmaxvq_s16(max16x8); +#else + int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8)); + min16x4 = vpmin_s16(min16x4, min16x4); + min16x4 = vpmin_s16(min16x4, min16x4); + + minimum = vget_lane_s16(min16x4, 0); + + int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8)); + max16x4 = vpmax_s16(max16x4, max16x4); + max16x4 = vpmax_s16(max16x4, max16x4); + + maximum = vget_lane_s16(max16x4, 0); +#endif + + // Second part, do the remaining iterations (if any). + for (i = residual; i > 0; i--) { + if (*p_start < minimum) + minimum = *p_start; + if (*p_start > maximum) + maximum = *p_start; + p_start++; + } + *min_val = minimum; + *max_val = maximum; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/randomization_functions.c b/third_party/libwebrtc/common_audio/signal_processing/randomization_functions.c new file mode 100644 index 0000000000..a445c572c7 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/randomization_functions.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the randomization functions + * WebRtcSpl_RandU() + * WebRtcSpl_RandN() + * WebRtcSpl_RandUArray() + * + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +static const uint32_t kMaxSeedUsed = 0x80000000; + +static const int16_t kRandNTable[] = { + 9178, -7260, 40, 10189, 4894, -3531, -13779, 14764, + -4008, -8884, -8990, 1008, 7368, 5184, 3251, -5817, + -9786, 5963, 1770, 8066, -7135, 10772, -2298, 1361, + 6484, 2241, -8633, 792, 199, -3344, 6553, -10079, + -15040, 95, 11608, -12469, 14161, -4176, 2476, 6403, + 13685, -16005, 6646, 2239, 10916, -3004, -602, -3141, + 2142, 14144, -5829, 5305, 8209, 4713, 2697, -5112, + 16092, -1210, -2891, -6631, -5360, -11878, -6781, -2739, + -6392, 536, 10923, 10872, 5059, -4748, -7770, 5477, + 38, -1025, -2892, 1638, 6304, 14375, -11028, 1553, + -1565, 10762, -393, 4040, 5257, 12310, 6554, -4799, + 4899, -6354, 1603, -1048, -2220, 8247, -186, -8944, + -12004, 2332, 4801, -4933, 6371, 131, 8614, -5927, + -8287, -22760, 4033, -15162, 3385, 3246, 3153, -5250, + 3766, 784, 6494, -62, 3531, -1582, 15572, 662, + -3952, -330, -3196, 669, 7236, -2678, -6569, 23319, + -8645, -741, 14830, -15976, 4903, 315, -11342, 10311, + 1858, -7777, 2145, 5436, 5677, -113, -10033, 826, + -1353, 17210, 7768, 986, -1471, 8291, -4982, 8207, + -14911, -6255, -2449, -11881, -7059, -11703, -4338, 8025, + 7538, -2823, -12490, 9470, -1613, -2529, -10092, -7807, + 9480, 6970, -12844, 5123, 3532, 4816, 4803, -8455, + -5045, 14032, -4378, -1643, 5756, -11041, -2732, -16618, + -6430, -18375, -3320, 6098, 5131, -4269, -8840, 2482, + -7048, 1547, -21890, -6505, -7414, -424, -11722, 7955, + 1653, -17299, 1823, 473, -9232, 3337, 1111, 873, + 4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539, + 3231, 7054, -8537, 7616, 6244, 16635, 447, -2915, + 13967, 705, -2669, -1520, -1771, -16188, 5956, 5117, + 6371, -9936, -1448, 2480, 5128, 7550, -8130, 5236, + 8213, -6443, 7707, -1950, -13811, 7218, 7031, -3883, + 67, 5731, -2874, 13480, -3743, 9298, -3280, 3552, + -4425, -18, -3785, -9988, -5357, 5477, -11794, 2117, + 1416, -9935, 3376, 802, -5079, -8243, 12652, 66, + 3653, -2368, 6781, -21895, -7227, 2487, 7839, -385, + 6646, -7016, -4658, 5531, -1705, 834, 129, 3694, + -1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494, + -5626, 185, -3615, -2041, -7972, -3106, -60, -23497, + -1566, 17064, 3519, 2518, 304, -6805, -10269, 2105, + 1936, -426, -736, -8122, -1467, 4238, -6939, -13309, + 360, 7402, -7970, 12576, 3287, 12194, -6289, -16006, + 9171, 4042, -9193, 9123, -2512, 6388, -4734, -8739, + 1028, -5406, -1696, 5889, -666, -4736, 4971, 3565, + 9362, -6292, 3876, -3652, -19666, 7523, -4061, 391, + -11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496, + 7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847, + 3698, 6978, 4751, -6957, -3581, -45, 6252, 1513, + -4797, -7925, 11270, 16188, -2359, -5269, 9376, -10777, + 7262, 20031, -6515, -2208, -5353, 8085, -1341, -1303, + 7333, 5576, 3625, 5763, -7931, 9833, -3371, -10305, + 6534, -13539, -9971, 997, 8464, -4064, -1495, 1857, + 13624, 5458, 9490, -11086, -4524, 12022, -550, -198, + 408, -8455, -7068, 10289, 9712, -3366, 9028, -7621, + -5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563, + -62, -566, 1624, -7010, 14730, -17791, -3697, -2344, + -1741, 7099, -9509, -6855, -1989, 3495, -2289, 2031, + 12784, 891, 14189, -3963, -5683, 421, -12575, 1724, + -12682, -5970, -8169, 3143, -1824, -5488, -5130, 8536, + 12799, 794, 5738, 3459, -11689, -258, -3738, -3775, + -8742, 2333, 8312, -9383, 10331, 13119, 8398, 10644, + -19433, -6446, -16277, -11793, 16284, 9345, 15222, 15834, + 2009, -7349, 130, -14547, 338, -5998, 3337, 21492, + 2406, 7703, -951, 11196, -564, 3406, 2217, 4806, + 2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492 +}; + +static uint32_t IncreaseSeed(uint32_t* seed) { + seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1); + return seed[0]; +} + +int16_t WebRtcSpl_RandU(uint32_t* seed) { + return (int16_t)(IncreaseSeed(seed) >> 16); +} + +int16_t WebRtcSpl_RandN(uint32_t* seed) { + return kRandNTable[IncreaseSeed(seed) >> 23]; +} + +// Creates an array of uniformly distributed variables. +int16_t WebRtcSpl_RandUArray(int16_t* vector, + int16_t vector_length, + uint32_t* seed) { + int i; + for (i = 0; i < vector_length; i++) { + vector[i] = WebRtcSpl_RandU(seed); + } + return vector_length; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/real_fft.c b/third_party/libwebrtc/common_audio/signal_processing/real_fft.c new file mode 100644 index 0000000000..780e517a15 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/real_fft.c @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/real_fft.h" + +#include <stdlib.h> + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +struct RealFFT { + int order; +}; + +struct RealFFT* WebRtcSpl_CreateRealFFT(int order) { + struct RealFFT* self = NULL; + + if (order > kMaxFFTOrder || order < 0) { + return NULL; + } + + self = malloc(sizeof(struct RealFFT)); + if (self == NULL) { + return NULL; + } + self->order = order; + + return self; +} + +void WebRtcSpl_FreeRealFFT(struct RealFFT* self) { + if (self != NULL) { + free(self); + } +} + +// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFT and +// WebRtcSpl_RealInverseFFT) are real-valued FFT wrappers for complex-valued +// FFT implementation in SPL. + +int WebRtcSpl_RealForwardFFT(struct RealFFT* self, + const int16_t* real_data_in, + int16_t* complex_data_out) { + int i = 0; + int j = 0; + int result = 0; + int n = 1 << self->order; + // The complex-value FFT implementation needs a buffer to hold 2^order + // 16-bit COMPLEX numbers, for both time and frequency data. + int16_t complex_buffer[2 << kMaxFFTOrder]; + + // Insert zeros to the imaginary parts for complex forward FFT input. + for (i = 0, j = 0; i < n; i += 1, j += 2) { + complex_buffer[j] = real_data_in[i]; + complex_buffer[j + 1] = 0; + }; + + WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); + result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1); + + // For real FFT output, use only the first N + 2 elements from + // complex forward FFT. + memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2)); + + return result; +} + +int WebRtcSpl_RealInverseFFT(struct RealFFT* self, + const int16_t* complex_data_in, + int16_t* real_data_out) { + int i = 0; + int j = 0; + int result = 0; + int n = 1 << self->order; + // Create the buffer specific to complex-valued FFT implementation. + int16_t complex_buffer[2 << kMaxFFTOrder]; + + // For n-point FFT, first copy the first n + 2 elements into complex + // FFT, then construct the remaining n - 2 elements by real FFT's + // conjugate-symmetric properties. + memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2)); + for (i = n + 2; i < 2 * n; i += 2) { + complex_buffer[i] = complex_data_in[2 * n - i]; + complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1]; + } + + WebRtcSpl_ComplexBitReverse(complex_buffer, self->order); + result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1); + + // Strip out the imaginary parts of the complex inverse FFT output. + for (i = 0, j = 0; i < n; i += 1, j += 2) { + real_data_out[i] = complex_buffer[j]; + } + + return result; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/real_fft_unittest.cc b/third_party/libwebrtc/common_audio/signal_processing/real_fft_unittest.cc new file mode 100644 index 0000000000..7cabe7d9fe --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/real_fft_unittest.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/signal_processing/include/real_fft.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// FFT order. +const int kOrder = 5; +// Lengths for real FFT's time and frequency bufffers. +// For N-point FFT, the length requirements from API are N and N+2 respectively. +const int kTimeDataLength = 1 << kOrder; +const int kFreqDataLength = (1 << kOrder) + 2; +// For complex FFT's time and freq buffer. The implementation requires +// 2*N 16-bit words. +const int kComplexFftDataLength = 2 << kOrder; +// Reference data for time signal. +const int16_t kRefData[kTimeDataLength] = { + 11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410, + -26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076, + 1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410, + -2629, 5607, -3, 1178, -23819, 1498, -25772, 10076}; + +TEST(RealFFTTest, CreateFailsOnBadInput) { + RealFFT* fft = WebRtcSpl_CreateRealFFT(11); + EXPECT_TRUE(fft == nullptr); + fft = WebRtcSpl_CreateRealFFT(-1); + EXPECT_TRUE(fft == nullptr); +} + +TEST(RealFFTTest, RealAndComplexMatch) { + int i = 0; + int j = 0; + int16_t real_fft_time[kTimeDataLength] = {0}; + int16_t real_fft_freq[kFreqDataLength] = {0}; + // One common buffer for complex FFT's time and frequency data. + int16_t complex_fft_buff[kComplexFftDataLength] = {0}; + + // Prepare the inputs to forward FFT's. + memcpy(real_fft_time, kRefData, sizeof(kRefData)); + for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) { + complex_fft_buff[j] = kRefData[i]; + complex_fft_buff[j + 1] = 0; // Insert zero's to imaginary parts. + } + + // Create and run real forward FFT. + RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder); + EXPECT_TRUE(fft != nullptr); + EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq)); + + // Run complex forward FFT. + WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder); + EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1)); + + // Verify the results between complex and real forward FFT. + for (i = 0; i < kFreqDataLength; i++) { + EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]); + } + + // Prepare the inputs to inverse real FFT. + // We use whatever data in complex_fft_buff[] since we don't care + // about data contents. Only kFreqDataLength 16-bit words are copied + // from complex_fft_buff to real_fft_freq since remaining words (2nd half) + // are conjugate-symmetric to the first half in theory. + memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq)); + + // Run real inverse FFT. + int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time); + EXPECT_GE(real_scale, 0); + + // Run complex inverse FFT. + WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder); + int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1); + + // Verify the results between complex and real inverse FFT. + // They are not bit-exact, since complex IFFT doesn't produce + // exactly conjugate-symmetric data (between first and second half). + EXPECT_EQ(real_scale, complex_scale); + for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) { + EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1); + } + + WebRtcSpl_FreeRealFFT(fft); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/common_audio/signal_processing/refl_coef_to_lpc.c b/third_party/libwebrtc/common_audio/signal_processing/refl_coef_to_lpc.c new file mode 100644 index 0000000000..b0858b2b0e --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/refl_coef_to_lpc.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_ReflCoefToLpc(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a) +{ + int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; + int16_t *aptr, *aptr2, *anyptr; + const int16_t *kptr; + int m, i; + + kptr = k; + *a = 4096; // i.e., (Word16_MAX >> 3)+1. + *any = *a; + a[1] = *k >> 3; + + for (m = 1; m < use_order; m++) + { + kptr++; + aptr = a; + aptr++; + aptr2 = &a[m]; + anyptr = any; + anyptr++; + + any[m + 1] = *kptr >> 3; + for (i = 0; i < m; i++) + { + *anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15); + anyptr++; + aptr++; + aptr2--; + } + + aptr = a; + anyptr = any; + for (i = 0; i < (m + 2); i++) + { + *aptr = *anyptr; + aptr++; + anyptr++; + } + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/resample.c b/third_party/libwebrtc/common_audio/signal_processing/resample.c new file mode 100644 index 0000000000..d4b2736476 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/resample.c @@ -0,0 +1,505 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling functions for 22 kHz. + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/signal_processing/resample_by_2_internal.h" + +// Declaration of internally used functions +static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out, + int32_t K); + +void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out, + int32_t K); + +// interpolation coefficients +static const int16_t kCoefficients32To22[5][9] = { + {127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154}, + {-39, 230, -830, 2785, 32366, -2324, 760, -218, 38}, + {117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137}, + {-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71}, + { 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110} +}; + +////////////////////// +// 22 kHz -> 16 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 4, 5, 10 +#define SUB_BLOCKS_22_16 5 + +// 22 -> 16 resampler +void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out, + WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_22_16; k++) + { + ///// 22 --> 44 ///// + // int16_t in[220/SUB_BLOCKS_22_16] + // int32_t out[440/SUB_BLOCKS_22_16] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44); + + ///// 44 --> 32 ///// + // int32_t in[440/SUB_BLOCKS_22_16] + // int32_t out[320/SUB_BLOCKS_22_16] + ///// + // copy state to and from input array + tmpmem[8] = state->S_44_32[0]; + tmpmem[9] = state->S_44_32[1]; + tmpmem[10] = state->S_44_32[2]; + tmpmem[11] = state->S_44_32[3]; + tmpmem[12] = state->S_44_32[4]; + tmpmem[13] = state->S_44_32[5]; + tmpmem[14] = state->S_44_32[6]; + tmpmem[15] = state->S_44_32[7]; + state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8]; + state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9]; + state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10]; + state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11]; + state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12]; + state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13]; + state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14]; + state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15]; + + WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16); + + ///// 32 --> 16 ///// + // int32_t in[320/SUB_BLOCKS_22_16] + // int32_t out[160/SUB_BLOCKS_22_16] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16); + + // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead + in += 220 / SUB_BLOCKS_22_16; + out += 160 / SUB_BLOCKS_22_16; + } +} + +// initialize state of 22 -> 16 resampler +void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_22_44[k] = 0; + state->S_44_32[k] = 0; + state->S_32_16[k] = 0; + } +} + +////////////////////// +// 16 kHz -> 22 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 4, 5, 10 +#define SUB_BLOCKS_16_22 4 + +// 16 -> 22 resampler +void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out, + WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_16_22; k++) + { + ///// 16 --> 32 ///// + // int16_t in[160/SUB_BLOCKS_16_22] + // int32_t out[320/SUB_BLOCKS_16_22] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32); + + ///// 32 --> 22 ///// + // int32_t in[320/SUB_BLOCKS_16_22] + // int32_t out[220/SUB_BLOCKS_16_22] + ///// + // copy state to and from input array + tmpmem[0] = state->S_32_22[0]; + tmpmem[1] = state->S_32_22[1]; + tmpmem[2] = state->S_32_22[2]; + tmpmem[3] = state->S_32_22[3]; + tmpmem[4] = state->S_32_22[4]; + tmpmem[5] = state->S_32_22[5]; + tmpmem[6] = state->S_32_22[6]; + tmpmem[7] = state->S_32_22[7]; + state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22]; + state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1]; + state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2]; + state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3]; + state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4]; + state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5]; + state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6]; + state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7]; + + WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22); + + // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead + in += 160 / SUB_BLOCKS_16_22; + out += 220 / SUB_BLOCKS_16_22; + } +} + +// initialize state of 16 -> 22 resampler +void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_16_32[k] = 0; + state->S_32_22[k] = 0; + } +} + +////////////////////// +// 22 kHz -> 8 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 5, 10 +#define SUB_BLOCKS_22_8 2 + +// 22 -> 8 resampler +void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out, + WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_22_8; k++) + { + ///// 22 --> 22 lowpass ///// + // int16_t in[220/SUB_BLOCKS_22_8] + // int32_t out[220/SUB_BLOCKS_22_8] + ///// + WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22); + + ///// 22 --> 16 ///// + // int32_t in[220/SUB_BLOCKS_22_8] + // int32_t out[160/SUB_BLOCKS_22_8] + ///// + // copy state to and from input array + tmpmem[8] = state->S_22_16[0]; + tmpmem[9] = state->S_22_16[1]; + tmpmem[10] = state->S_22_16[2]; + tmpmem[11] = state->S_22_16[3]; + tmpmem[12] = state->S_22_16[4]; + tmpmem[13] = state->S_22_16[5]; + tmpmem[14] = state->S_22_16[6]; + tmpmem[15] = state->S_22_16[7]; + state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8]; + state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9]; + state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10]; + state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11]; + state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12]; + state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13]; + state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14]; + state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15]; + + WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8); + + ///// 16 --> 8 ///// + // int32_t in[160/SUB_BLOCKS_22_8] + // int32_t out[80/SUB_BLOCKS_22_8] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8); + + // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead + in += 220 / SUB_BLOCKS_22_8; + out += 80 / SUB_BLOCKS_22_8; + } +} + +// initialize state of 22 -> 8 resampler +void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_22_22[k] = 0; + state->S_22_22[k + 8] = 0; + state->S_22_16[k] = 0; + state->S_16_8[k] = 0; + } +} + +////////////////////// +// 8 kHz -> 22 kHz // +////////////////////// + +// number of subblocks; options: 1, 2, 5, 10 +#define SUB_BLOCKS_8_22 2 + +// 8 -> 22 resampler +void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out, + WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem) +{ + int k; + + // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_8_22; k++) + { + ///// 8 --> 16 ///// + // int16_t in[80/SUB_BLOCKS_8_22] + // int32_t out[160/SUB_BLOCKS_8_22] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16); + + ///// 16 --> 11 ///// + // int32_t in[160/SUB_BLOCKS_8_22] + // int32_t out[110/SUB_BLOCKS_8_22] + ///// + // copy state to and from input array + tmpmem[10] = state->S_16_11[0]; + tmpmem[11] = state->S_16_11[1]; + tmpmem[12] = state->S_16_11[2]; + tmpmem[13] = state->S_16_11[3]; + tmpmem[14] = state->S_16_11[4]; + tmpmem[15] = state->S_16_11[5]; + tmpmem[16] = state->S_16_11[6]; + tmpmem[17] = state->S_16_11[7]; + state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10]; + state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11]; + state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12]; + state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13]; + state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14]; + state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15]; + state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16]; + state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17]; + + WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22); + + ///// 11 --> 22 ///// + // int32_t in[110/SUB_BLOCKS_8_22] + // int16_t out[220/SUB_BLOCKS_8_22] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22); + + // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead + in += 80 / SUB_BLOCKS_8_22; + out += 220 / SUB_BLOCKS_8_22; + } +} + +// initialize state of 8 -> 22 resampler +void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state) +{ + int k; + for (k = 0; k < 8; k++) + { + state->S_8_16[k] = 0; + state->S_16_11[k] = 0; + state->S_11_22[k] = 0; + } +} + +// compute two inner-products and store them to output array +static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2, + const int16_t* coef_ptr, int32_t* out1, + int32_t* out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; +} + +// compute two inner-products and store them to output array +static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2, + const int16_t* coef_ptr, int16_t* out1, + int16_t* out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + tmp1 += coef * in1[8]; + tmp2 += coef * in2[-8]; + + // scale down, round and saturate + tmp1 >>= 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + tmp2 >>= 15; + if (tmp2 > (int32_t)0x00007FFF) + tmp2 = 0x00007FFF; + if (tmp2 < (int32_t)0xFFFF8000) + tmp2 = 0xFFFF8000; + *out1 = (int16_t)tmp1; + *out2 = (int16_t)tmp2; +} + +// Resampling ratio: 11/16 +// input: int32_t (normalized, not saturated) :: size 16 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K +// K: Number of blocks + +void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In, + int32_t* Out, + int32_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (16 input samples -> 11 output samples); + // process in sub blocks of size 16 samples. + int32_t m; + + for (m = 0; m < K; m++) + { + // first output sample + Out[0] = ((int32_t)In[3] << 15) + (1 << 14); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); + + // update pointers + In += 16; + Out += 11; + } +} + +// Resampling ratio: 11/16 +// input: int32_t (normalized, not saturated) :: size 16 * K +// output: int16_t (saturated) :: size 11 * K +// K: Number of blocks + +void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, + int16_t *Out, + int32_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (16 input samples -> 11 output samples); + // process in sub blocks of size 16 samples. + int32_t tmp; + int32_t m; + + for (m = 0; m < K; m++) + { + // first output sample + tmp = In[3]; + if (tmp > (int32_t)0x00007FFF) + tmp = 0x00007FFF; + if (tmp < (int32_t)0xFFFF8000) + tmp = 0xFFFF8000; + Out[0] = (int16_t)tmp; + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); + + // update pointers + In += 16; + Out += 11; + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/resample_48khz.c b/third_party/libwebrtc/common_audio/signal_processing/resample_48khz.c new file mode 100644 index 0000000000..8518e7b1ce --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/resample_48khz.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains resampling functions between 48 kHz and nb/wb. + * The description header can be found in signal_processing_library.h + * + */ + +#include <string.h> +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/signal_processing/resample_by_2_internal.h" + +//////////////////////////// +///// 48 kHz -> 16 kHz ///// +//////////////////////////// + +// 48 -> 16 resampler +void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem) +{ + ///// 48 --> 48(LP) ///// + // int16_t in[480] + // int32_t out[480] + ///// + WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); + + ///// 48 --> 32 ///// + // int32_t in[480] + // int32_t out[320] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); + memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); + + ///// 32 --> 16 ///// + // int32_t in[320] + // int16_t out[160] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); +} + +// initialize state of 48 -> 16 resampler +void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) +{ + memset(state->S_48_48, 0, 16 * sizeof(int32_t)); + memset(state->S_48_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_16, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 16 kHz -> 48 kHz ///// +//////////////////////////// + +// 16 -> 48 resampler +void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem) +{ + ///// 16 --> 32 ///// + // int16_t in[160] + // int32_t out[320] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); + + ///// 32 --> 24 ///// + // int32_t in[320] + // int32_t out[240] + // copy state to and from input array + ///// + memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); + memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); + + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); +} + +// initialize state of 16 -> 48 resampler +void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) +{ + memset(state->S_16_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 48 kHz -> 8 kHz ///// +//////////////////////////// + +// 48 -> 8 resampler +void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem) +{ + ///// 48 --> 24 ///// + // int16_t in[480] + // int32_t out[240] + ///// + WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); + + ///// 24 --> 24(LP) ///// + // int32_t in[240] + // int32_t out[240] + ///// + WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); + + ///// 24 --> 16 ///// + // int32_t in[240] + // int32_t out[160] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); + memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); + + ///// 16 --> 8 ///// + // int32_t in[160] + // int16_t out[80] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); +} + +// initialize state of 48 -> 8 resampler +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) +{ + memset(state->S_48_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_24, 0, 16 * sizeof(int32_t)); + memset(state->S_24_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_8, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 8 kHz -> 48 kHz ///// +//////////////////////////// + +// 8 -> 48 resampler +void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem) +{ + ///// 8 --> 16 ///// + // int16_t in[80] + // int32_t out[160] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); + + ///// 16 --> 12 ///// + // int32_t in[160] + // int32_t out[120] + ///// + // copy state to and from input array + memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); + memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); + + ///// 12 --> 24 ///// + // int32_t in[120] + // int16_t out[240] + ///// + WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); + + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); +} + +// initialize state of 8 -> 48 resampler +void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) +{ + memset(state->S_8_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_12, 0, 8 * sizeof(int32_t)); + memset(state->S_12_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/resample_by_2.c b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2.c new file mode 100644 index 0000000000..73e1950654 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2.c @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling by two functions. + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#ifdef WEBRTC_ARCH_ARM_V7 + +// allpass filter coefficients. +static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15}; +static const uint32_t kResampleAllpass2[3] = + {12199, 37471 << 15, 60255 << 15}; + +// Multiply two 32-bit values and accumulate to another input value. +// Return: state + ((diff * tbl_value) >> 16) + +static __inline int32_t MUL_ACCUM_1(int32_t tbl_value, + int32_t diff, + int32_t state) { + int32_t result; + __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff), + "r"(tbl_value), "r"(state)); + return result; +} + +// Multiply two 32-bit values and accumulate to another input value. +// Return: Return: state + (((diff << 1) * tbl_value) >> 32) +// +// The reason to introduce this function is that, in case we can't use smlawb +// instruction (in MUL_ACCUM_1) due to input value range, we can still use +// smmla to save some cycles. + +static __inline int32_t MUL_ACCUM_2(int32_t tbl_value, + int32_t diff, + int32_t state) { + int32_t result; + __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1), + "r"(tbl_value), "r"(state)); + return result; +} + +#else + +// allpass filter coefficients. +static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528}; +static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255}; + +// Multiply a 32-bit value with a 16-bit value and accumulate to another input: +#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) +#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) + +#endif // WEBRTC_ARCH_ARM_V7 + + +// decimator +#if !defined(MIPS32_LE) +void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len, + int16_t* out, int32_t* filtState) { + int32_t tmp1, tmp2, diff, in32, out32; + size_t i; + + register int32_t state0 = filtState[0]; + register int32_t state1 = filtState[1]; + register int32_t state2 = filtState[2]; + register int32_t state3 = filtState[3]; + register int32_t state4 = filtState[4]; + register int32_t state5 = filtState[5]; + register int32_t state6 = filtState[6]; + register int32_t state7 = filtState[7]; + + for (i = (len >> 1); i > 0; i--) { + // lower allpass filter + in32 = (int32_t)(*in++) * (1 << 10); + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) * (1 << 10); + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } + + filtState[0] = state0; + filtState[1] = state1; + filtState[2] = state2; + filtState[3] = state3; + filtState[4] = state4; + filtState[5] = state5; + filtState[6] = state6; + filtState[7] = state7; +} +#endif // #if defined(MIPS32_LE) + + +void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len, + int16_t* out, int32_t* filtState) { + int32_t tmp1, tmp2, diff, in32, out32; + size_t i; + + register int32_t state0 = filtState[0]; + register int32_t state1 = filtState[1]; + register int32_t state2 = filtState[2]; + register int32_t state3 = filtState[3]; + register int32_t state4 = filtState[4]; + register int32_t state5 = filtState[5]; + register int32_t state6 = filtState[6]; + register int32_t state7 = filtState[7]; + + for (i = len; i > 0; i--) { + // lower allpass filter + in32 = (int32_t)(*in++) * (1 << 10); + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2); + state2 = tmp2; + + // round; limit amplitude to prevent wrap-around; write to output array + out32 = (state3 + 512) >> 10; + *out++ = WebRtcSpl_SatW32ToW16(out32); + + // upper allpass filter + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6); + state6 = tmp2; + + // round; limit amplitude to prevent wrap-around; write to output array + out32 = (state7 + 512) >> 10; + *out++ = WebRtcSpl_SatW32ToW16(out32); + } + + filtState[0] = state0; + filtState[1] = state1; + filtState[2] = state2; + filtState[3] = state3; + filtState[4] = state4; + filtState[5] = state5; + filtState[6] = state6; + filtState[7] = state7; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_internal.c b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_internal.c new file mode 100644 index 0000000000..99592b20b5 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_internal.c @@ -0,0 +1,689 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This header file contains some internal resampling functions. + * + */ + +#include "common_audio/signal_processing/resample_by_2_internal.h" +#include "rtc_base/sanitizer.h" + +// allpass filter coefficients. +static const int16_t kResampleAllpass[2][3] = { + {821, 6110, 12382}, + {3050, 9368, 15063} +}; + +// +// decimator +// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN! +// output: int16_t (saturated) (of length len/2) +// state: filter state array; length = 8 + +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[1]; + // UBSan: -1771017321 - 999586185 cannot be represented in type 'int' + + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[7] >> 1); + } + + in--; + + // combine allpass outputs + for (i = 0; i < len; i += 2) + { + // divide by two, add both allpass outputs and round + tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; + tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; + if (tmp0 > (int32_t)0x00007FFF) + tmp0 = 0x00007FFF; + if (tmp0 < (int32_t)0xFFFF8000) + tmp0 = 0xFFFF8000; + out[i] = (int16_t)tmp0; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i + 1] = (int16_t)tmp1; + } +} + +// +// decimator +// input: int16_t +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2) +// state: filter state array; length = 8 + +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2ShortToInt(const int16_t *in, + int32_t len, + int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // UBSan: -1379909682 - 834099714 cannot be represented in type 'int' + + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + out[i] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + out[i] += (state[7] >> 1); + } + + in--; +} + +// +// interpolator +// input: int16_t +// output: int32_t (normalized, not saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7] >> 15; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 15; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7]; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3]; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int16_t (saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, saturate and store + tmp1 = state[7] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, saturate and store + tmp1 = state[3] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } +} + +// lowpass filter +// input: int16_t +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} + +// lowpass filter +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = in[i << 1]; + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // UBSan: -794814117 - 1566149201 cannot be represented in type 'int' + + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_internal.h b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_internal.h new file mode 100644 index 0000000000..145395a4cb --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_internal.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file contains some internal resampling functions. + * + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ + +#include <stdint.h> + +/******************************************************************* + * resample_by_2_fast.c + * Functions for internal use in the other resample functions + ******************************************************************/ +void WebRtcSpl_DownBy2IntToShort(int32_t* in, + int32_t len, + int16_t* out, + int32_t* state); + +void WebRtcSpl_DownBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_UpBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_UpBy2IntToInt(const int32_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_UpBy2IntToShort(const int32_t* in, + int32_t len, + int16_t* out, + int32_t* state); + +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_LPBy2IntToInt(const int32_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ diff --git a/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_mips.c b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_mips.c new file mode 100644 index 0000000000..f41bab7519 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/resample_by_2_mips.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling by two functions. + * The description header can be found in signal_processing_library.h + * + */ + +#if defined(MIPS32_LE) + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +#if !defined(MIPS_DSP_R2_LE) +// allpass filter coefficients. +static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528}; +static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255}; +#endif + +// Multiply a 32-bit value with a 16-bit value and accumulate to another input: +#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) +#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) + +// decimator +void WebRtcSpl_DownsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState) { + int32_t out32; + size_t i, len1; + + register int32_t state0 = filtState[0]; + register int32_t state1 = filtState[1]; + register int32_t state2 = filtState[2]; + register int32_t state3 = filtState[3]; + register int32_t state4 = filtState[4]; + register int32_t state5 = filtState[5]; + register int32_t state6 = filtState[6]; + register int32_t state7 = filtState[7]; + +#if defined(MIPS_DSP_R2_LE) + int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2; + + k1Res0= 3284; + k1Res1= 24441; + k1Res2= 49528; + k2Res0= 12199; + k2Res1= 37471; + k2Res2= 60255; + len1 = (len >> 1); + + const int32_t* inw = (int32_t*)in; + int32_t tmp11, tmp12, tmp21, tmp22; + int32_t in322, in321; + int32_t diff1, diff2; + for (i = len1; i > 0; i--) { + __asm__ volatile ( + "lh %[in321], 0(%[inw]) \n\t" + "lh %[in322], 2(%[inw]) \n\t" + + "sll %[in321], %[in321], 10 \n\t" + "sll %[in322], %[in322], 10 \n\t" + + "addiu %[inw], %[inw], 4 \n\t" + + "subu %[diff1], %[in321], %[state1] \n\t" + "subu %[diff2], %[in322], %[state5] \n\t" + + : [in322] "=&r" (in322), [in321] "=&r" (in321), + [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw) + : [state1] "r" (state1), [state5] "r" (state5) + : "memory" + ); + + __asm__ volatile ( + "mult $ac0, %[diff1], %[k2Res0] \n\t" + "mult $ac1, %[diff2], %[k1Res0] \n\t" + + "extr.w %[tmp11], $ac0, 16 \n\t" + "extr.w %[tmp12], $ac1, 16 \n\t" + + "addu %[tmp11], %[state0], %[tmp11] \n\t" + "addu %[tmp12], %[state4], %[tmp12] \n\t" + + "addiu %[state0], %[in321], 0 \n\t" + "addiu %[state4], %[in322], 0 \n\t" + + "subu %[diff1], %[tmp11], %[state2] \n\t" + "subu %[diff2], %[tmp12], %[state6] \n\t" + + "mult $ac0, %[diff1], %[k2Res1] \n\t" + "mult $ac1, %[diff2], %[k1Res1] \n\t" + + "extr.w %[tmp21], $ac0, 16 \n\t" + "extr.w %[tmp22], $ac1, 16 \n\t" + + "addu %[tmp21], %[state1], %[tmp21] \n\t" + "addu %[tmp22], %[state5], %[tmp22] \n\t" + + "addiu %[state1], %[tmp11], 0 \n\t" + "addiu %[state5], %[tmp12], 0 \n\t" + : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21), + [tmp11] "=&r" (tmp11), [state0] "+r" (state0), + [state1] "+r" (state1), + [state2] "+r" (state2), + [state4] "+r" (state4), [tmp12] "=&r" (tmp12), + [state6] "+r" (state6), [state5] "+r" (state5) + : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0), + [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322), + [in321] "r" (in321), [k1Res0] "r" (k1Res0) + : "hi", "lo", "$ac1hi", "$ac1lo" + ); + + // upper allpass filter + __asm__ volatile ( + "subu %[diff1], %[tmp21], %[state3] \n\t" + "subu %[diff2], %[tmp22], %[state7] \n\t" + + "mult $ac0, %[diff1], %[k2Res2] \n\t" + "mult $ac1, %[diff2], %[k1Res2] \n\t" + "extr.w %[state3], $ac0, 16 \n\t" + "extr.w %[state7], $ac1, 16 \n\t" + "addu %[state3], %[state2], %[state3] \n\t" + "addu %[state7], %[state6], %[state7] \n\t" + + "addiu %[state2], %[tmp21], 0 \n\t" + "addiu %[state6], %[tmp22], 0 \n\t" + + // add two allpass outputs, divide by two and round + "addu %[out32], %[state3], %[state7] \n\t" + "addiu %[out32], %[out32], 1024 \n\t" + "sra %[out32], %[out32], 11 \n\t" + : [state3] "+r" (state3), [state6] "+r" (state6), + [state2] "+r" (state2), [diff2] "=&r" (diff2), + [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7) + : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21), + [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2) + : "hi", "lo", "$ac1hi", "$ac1lo" + ); + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } +#else // #if defined(MIPS_DSP_R2_LE) + int32_t tmp1, tmp2, diff; + int32_t in32; + len1 = (len >> 1)/4; + for (i = len1; i > 0; i--) { + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + // lower allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state1; + tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0); + state0 = in32; + diff = tmp1 - state2; + tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1); + state1 = tmp1; + diff = tmp2 - state3; + state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2); + state2 = tmp2; + + // upper allpass filter + in32 = (int32_t)(*in++) << 10; + diff = in32 - state5; + tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4); + state4 = in32; + diff = tmp1 - state6; + tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5); + state5 = tmp1; + diff = tmp2 - state7; + state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6); + state6 = tmp2; + + // add two allpass outputs, divide by two and round + out32 = (state3 + state7 + 1024) >> 11; + + // limit amplitude to prevent wrap-around, and write to output array + *out++ = WebRtcSpl_SatW32ToW16(out32); + } +#endif // #if defined(MIPS_DSP_R2_LE) + __asm__ volatile ( + "sw %[state0], 0(%[filtState]) \n\t" + "sw %[state1], 4(%[filtState]) \n\t" + "sw %[state2], 8(%[filtState]) \n\t" + "sw %[state3], 12(%[filtState]) \n\t" + "sw %[state4], 16(%[filtState]) \n\t" + "sw %[state5], 20(%[filtState]) \n\t" + "sw %[state6], 24(%[filtState]) \n\t" + "sw %[state7], 28(%[filtState]) \n\t" + : + : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2), + [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5), + [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState) + : "memory" + ); +} + +#endif // #if defined(MIPS32_LE) diff --git a/third_party/libwebrtc/common_audio/signal_processing/resample_fractional.c b/third_party/libwebrtc/common_audio/signal_processing/resample_fractional.c new file mode 100644 index 0000000000..9ffe0aca60 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/resample_fractional.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling functions between 48, 44, 32 and 24 kHz. + * The description headers can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// interpolation coefficients +static const int16_t kCoefficients48To32[2][8] = { + {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, + {222, 441, -3783, 12903, 23285, 1087, -2050, 778} +}; + +static const int16_t kCoefficients32To24[3][8] = { + {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, + {386, -381, -2646, 19062, 19062, -2646, -381, 386}, + {90, 721, -3838, 10620, 24406, 2434, -2362, 767} +}; + +static const int16_t kCoefficients44To32[4][9] = { + {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, + {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, + {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, + {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126} +}; + +// Resampling ratio: 2/3 +// input: int32_t (normalized, not saturated) :: size 3 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K +// K: number of blocks + +void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (3 input samples -> 2 output samples); + // process in sub blocks of size 3 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients48To32[0][0] * In[0]; + tmp += kCoefficients48To32[0][1] * In[1]; + tmp += kCoefficients48To32[0][2] * In[2]; + tmp += kCoefficients48To32[0][3] * In[3]; + tmp += kCoefficients48To32[0][4] * In[4]; + tmp += kCoefficients48To32[0][5] * In[5]; + tmp += kCoefficients48To32[0][6] * In[6]; + tmp += kCoefficients48To32[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients48To32[1][0] * In[1]; + tmp += kCoefficients48To32[1][1] * In[2]; + tmp += kCoefficients48To32[1][2] * In[3]; + tmp += kCoefficients48To32[1][3] * In[4]; + tmp += kCoefficients48To32[1][4] * In[5]; + tmp += kCoefficients48To32[1][5] * In[6]; + tmp += kCoefficients48To32[1][6] * In[7]; + tmp += kCoefficients48To32[1][7] * In[8]; + Out[1] = tmp; + + // update pointers + In += 3; + Out += 2; + } +} + +// Resampling ratio: 3/4 +// input: int32_t (normalized, not saturated) :: size 4 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K +// K: number of blocks + +void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (4 input samples -> 3 output samples); + // process in sub blocks of size 4 samples. + size_t m; + int32_t tmp; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients32To24[0][0] * In[0]; + tmp += kCoefficients32To24[0][1] * In[1]; + tmp += kCoefficients32To24[0][2] * In[2]; + tmp += kCoefficients32To24[0][3] * In[3]; + tmp += kCoefficients32To24[0][4] * In[4]; + tmp += kCoefficients32To24[0][5] * In[5]; + tmp += kCoefficients32To24[0][6] * In[6]; + tmp += kCoefficients32To24[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[1][0] * In[1]; + tmp += kCoefficients32To24[1][1] * In[2]; + tmp += kCoefficients32To24[1][2] * In[3]; + tmp += kCoefficients32To24[1][3] * In[4]; + tmp += kCoefficients32To24[1][4] * In[5]; + tmp += kCoefficients32To24[1][5] * In[6]; + tmp += kCoefficients32To24[1][6] * In[7]; + tmp += kCoefficients32To24[1][7] * In[8]; + Out[1] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[2][0] * In[2]; + tmp += kCoefficients32To24[2][1] * In[3]; + tmp += kCoefficients32To24[2][2] * In[4]; + tmp += kCoefficients32To24[2][3] * In[5]; + tmp += kCoefficients32To24[2][4] * In[6]; + tmp += kCoefficients32To24[2][5] * In[7]; + tmp += kCoefficients32To24[2][6] * In[8]; + tmp += kCoefficients32To24[2][7] * In[9]; + Out[2] = tmp; + + // update pointers + In += 4; + Out += 3; + } +} + +// +// fractional resampling filters +// Fout = 11/16 * Fin +// Fout = 8/11 * Fin +// + +// compute two inner-products and store them to output array +static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2, + const int16_t *coef_ptr, int32_t *out1, + int32_t *out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; +} + +// Resampling ratio: 8/11 +// input: int32_t (normalized, not saturated) :: size 11 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K +// K: number of blocks + +void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (11 input samples -> 8 output samples); + // process in sub blocks of size 11 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + + // first output sample + Out[0] = ((int32_t)In[3] << 15) + tmp; + + // sum and accumulate filter coefficients and input samples + tmp += kCoefficients44To32[3][0] * In[5]; + tmp += kCoefficients44To32[3][1] * In[6]; + tmp += kCoefficients44To32[3][2] * In[7]; + tmp += kCoefficients44To32[3][3] * In[8]; + tmp += kCoefficients44To32[3][4] * In[9]; + tmp += kCoefficients44To32[3][5] * In[10]; + tmp += kCoefficients44To32[3][6] * In[11]; + tmp += kCoefficients44To32[3][7] * In[12]; + tmp += kCoefficients44To32[3][8] * In[13]; + Out[4] = tmp; + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]); + + // update pointers + In += 11; + Out += 8; + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/signal_processing_unittest.cc b/third_party/libwebrtc/common_audio/signal_processing/signal_processing_unittest.cc new file mode 100644 index 0000000000..80d605bc0b --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/signal_processing_unittest.cc @@ -0,0 +1,668 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <algorithm> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +static const size_t kVector16Size = 9; +static const int16_t vector16[kVector16Size] = {1, + -15511, + 4323, + 1963, + WEBRTC_SPL_WORD16_MAX, + 0, + WEBRTC_SPL_WORD16_MIN + 5, + -3333, + 345}; + +TEST(SplTest, MacroTest) { + // Macros with inputs. + int A = 10; + int B = 21; + int a = -3; + int b = WEBRTC_SPL_WORD32_MAX; + + EXPECT_EQ(10, WEBRTC_SPL_MIN(A, B)); + EXPECT_EQ(21, WEBRTC_SPL_MAX(A, B)); + + EXPECT_EQ(3, WEBRTC_SPL_ABS_W16(a)); + EXPECT_EQ(3, WEBRTC_SPL_ABS_W32(a)); + + EXPECT_EQ(-63, WEBRTC_SPL_MUL(a, B)); + EXPECT_EQ(2147483651u, WEBRTC_SPL_UMUL(a, b)); + b = WEBRTC_SPL_WORD16_MAX >> 1; + EXPECT_EQ(4294918147u, WEBRTC_SPL_UMUL_32_16(a, b)); + EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_U16(a, b)); + + a = b; + b = -3; + + EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT16(a, b)); + EXPECT_EQ(-1, WEBRTC_SPL_MUL_16_32_RSFT15(a, b)); + EXPECT_EQ(-3, WEBRTC_SPL_MUL_16_32_RSFT14(a, b)); + EXPECT_EQ(-24, WEBRTC_SPL_MUL_16_32_RSFT11(a, b)); + + EXPECT_EQ(-12288, WEBRTC_SPL_MUL_16_16_RSFT(a, b, 2)); + EXPECT_EQ(-12287, WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, 2)); + + EXPECT_EQ(21, WEBRTC_SPL_SAT(a, A, B)); + EXPECT_EQ(21, WEBRTC_SPL_SAT(a, B, A)); + + // Shifting with negative numbers allowed + int shift_amount = 1; // Workaround compiler warning using variable here. + // Positive means left shift + EXPECT_EQ(32766, WEBRTC_SPL_SHIFT_W32(a, shift_amount)); + + // Shifting with negative numbers not allowed + // We cannot do casting here due to signed/unsigned problem + EXPECT_EQ(32766, WEBRTC_SPL_LSHIFT_W32(a, 1)); + + EXPECT_EQ(8191u, WEBRTC_SPL_RSHIFT_U32(a, 1)); + + EXPECT_EQ(1470, WEBRTC_SPL_RAND(A)); + + EXPECT_EQ(-49149, WEBRTC_SPL_MUL_16_16(a, b)); + EXPECT_EQ(1073676289, + WEBRTC_SPL_MUL_16_16(WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MAX)); + EXPECT_EQ(1073709055, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MAX, + WEBRTC_SPL_WORD32_MAX)); + EXPECT_EQ(1073741824, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN, + WEBRTC_SPL_WORD32_MIN)); +#ifdef WEBRTC_ARCH_ARM_V7 + EXPECT_EQ(-1073741824, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN, + WEBRTC_SPL_WORD32_MAX)); +#else + EXPECT_EQ(-1073741823, WEBRTC_SPL_MUL_16_32_RSFT16(WEBRTC_SPL_WORD16_MIN, + WEBRTC_SPL_WORD32_MAX)); +#endif +} + +TEST(SplTest, InlineTest) { + int16_t a16 = 121; + int16_t b16 = -17; + int32_t a32 = 111121; + int32_t b32 = -1711; + + EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(a32)); + + EXPECT_EQ(0, WebRtcSpl_NormW32(0)); + EXPECT_EQ(31, WebRtcSpl_NormW32(-1)); + EXPECT_EQ(0, WebRtcSpl_NormW32(WEBRTC_SPL_WORD32_MIN)); + EXPECT_EQ(14, WebRtcSpl_NormW32(a32)); + + EXPECT_EQ(0, WebRtcSpl_NormW16(0)); + EXPECT_EQ(15, WebRtcSpl_NormW16(-1)); + EXPECT_EQ(0, WebRtcSpl_NormW16(WEBRTC_SPL_WORD16_MIN)); + EXPECT_EQ(4, WebRtcSpl_NormW16(b32)); + for (int ii = 0; ii < 15; ++ii) { + int16_t value = 1 << ii; + EXPECT_EQ(14 - ii, WebRtcSpl_NormW16(value)); + EXPECT_EQ(15 - ii, WebRtcSpl_NormW16(-value)); + } + + EXPECT_EQ(0, WebRtcSpl_NormU32(0u)); + EXPECT_EQ(0, WebRtcSpl_NormU32(0xffffffff)); + EXPECT_EQ(15, WebRtcSpl_NormU32(static_cast<uint32_t>(a32))); + + EXPECT_EQ(104, WebRtcSpl_AddSatW16(a16, b16)); + EXPECT_EQ(138, WebRtcSpl_SubSatW16(a16, b16)); +} + +TEST(SplTest, AddSubSatW32) { + static constexpr int32_t kAddSubArgs[] = { + INT32_MIN, INT32_MIN + 1, -3, -2, -1, 0, 1, -1, 2, + 3, INT32_MAX - 1, INT32_MAX}; + for (int32_t a : kAddSubArgs) { + for (int32_t b : kAddSubArgs) { + const int64_t sum = std::max<int64_t>( + INT32_MIN, std::min<int64_t>(INT32_MAX, static_cast<int64_t>(a) + b)); + const int64_t diff = std::max<int64_t>( + INT32_MIN, std::min<int64_t>(INT32_MAX, static_cast<int64_t>(a) - b)); + rtc::StringBuilder ss; + ss << a << " +/- " << b << ": sum " << sum << ", diff " << diff; + SCOPED_TRACE(ss.str()); + EXPECT_EQ(sum, WebRtcSpl_AddSatW32(a, b)); + EXPECT_EQ(diff, WebRtcSpl_SubSatW32(a, b)); + } + } +} + +TEST(SplTest, CountLeadingZeros32) { + EXPECT_EQ(32, WebRtcSpl_CountLeadingZeros32(0)); + EXPECT_EQ(32, WebRtcSpl_CountLeadingZeros32_NotBuiltin(0)); + for (int i = 0; i < 32; ++i) { + const uint32_t single_one = uint32_t{1} << i; + const uint32_t all_ones = 2 * single_one - 1; + EXPECT_EQ(31 - i, WebRtcSpl_CountLeadingZeros32(single_one)); + EXPECT_EQ(31 - i, WebRtcSpl_CountLeadingZeros32_NotBuiltin(single_one)); + EXPECT_EQ(31 - i, WebRtcSpl_CountLeadingZeros32(all_ones)); + EXPECT_EQ(31 - i, WebRtcSpl_CountLeadingZeros32_NotBuiltin(all_ones)); + } +} + +TEST(SplTest, CountLeadingZeros64) { + EXPECT_EQ(64, WebRtcSpl_CountLeadingZeros64(0)); + EXPECT_EQ(64, WebRtcSpl_CountLeadingZeros64_NotBuiltin(0)); + for (int i = 0; i < 64; ++i) { + const uint64_t single_one = uint64_t{1} << i; + const uint64_t all_ones = 2 * single_one - 1; + EXPECT_EQ(63 - i, WebRtcSpl_CountLeadingZeros64(single_one)); + EXPECT_EQ(63 - i, WebRtcSpl_CountLeadingZeros64_NotBuiltin(single_one)); + EXPECT_EQ(63 - i, WebRtcSpl_CountLeadingZeros64(all_ones)); + EXPECT_EQ(63 - i, WebRtcSpl_CountLeadingZeros64_NotBuiltin(all_ones)); + } +} + +TEST(SplTest, MathOperationsTest) { + int A = 1134567892; + int32_t num = 117; + int32_t den = -5; + uint16_t denU = 5; + EXPECT_EQ(33700, WebRtcSpl_Sqrt(A)); + EXPECT_EQ(33683, WebRtcSpl_SqrtFloor(A)); + + EXPECT_EQ(-91772805, WebRtcSpl_DivResultInQ31(den, num)); + EXPECT_EQ(-23, WebRtcSpl_DivW32W16ResW16(num, (int16_t)den)); + EXPECT_EQ(-23, WebRtcSpl_DivW32W16(num, (int16_t)den)); + EXPECT_EQ(23u, WebRtcSpl_DivU32U16(num, denU)); + EXPECT_EQ(0, WebRtcSpl_DivW32HiLow(128, 0, 256)); +} + +TEST(SplTest, BasicArrayOperationsTest) { + const size_t kVectorSize = 4; + int B[] = {4, 12, 133, 1100}; + int16_t b16[kVectorSize]; + int32_t b32[kVectorSize]; + + int16_t bTmp16[kVectorSize]; + int32_t bTmp32[kVectorSize]; + + WebRtcSpl_MemSetW16(b16, 3, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(3, b16[kk]); + } + WebRtcSpl_ZerosArrayW16(b16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(0, b16[kk]); + } + WebRtcSpl_MemSetW32(b32, 3, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(3, b32[kk]); + } + WebRtcSpl_ZerosArrayW32(b32, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(0, b32[kk]); + } + for (size_t kk = 0; kk < kVectorSize; ++kk) { + bTmp16[kk] = (int16_t)kk; + bTmp32[kk] = (int32_t)kk; + } + WEBRTC_SPL_MEMCPY_W16(b16, bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(b16[kk], bTmp16[kk]); + } + // WEBRTC_SPL_MEMCPY_W32(b32, bTmp32, kVectorSize); + // for (int kk = 0; kk < kVectorSize; ++kk) { + // EXPECT_EQ(b32[kk], bTmp32[kk]); + // } + WebRtcSpl_CopyFromEndW16(b16, kVectorSize, 2, bTmp16); + for (size_t kk = 0; kk < 2; ++kk) { + EXPECT_EQ(static_cast<int16_t>(kk + 2), bTmp16[kk]); + } + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + b32[kk] = B[kk]; + b16[kk] = (int16_t)B[kk]; + } + WebRtcSpl_VectorBitShiftW32ToW16(bTmp16, kVectorSize, b32, 1); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk] >> 1), bTmp16[kk]); + } + WebRtcSpl_VectorBitShiftW16(bTmp16, kVectorSize, b16, 1); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk] >> 1), bTmp16[kk]); + } + WebRtcSpl_VectorBitShiftW32(bTmp32, kVectorSize, b32, 1); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk] >> 1), bTmp32[kk]); + } + + WebRtcSpl_MemCpyReversedOrder(&bTmp16[3], b16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(b16[3 - kk], bTmp16[kk]); + } +} + +TEST(SplTest, MinMaxOperationsTest) { + const size_t kVectorSize = 17; + + // Vectors to test the cases where minimum values have to be caught + // outside of the unrolled loops in ARM-Neon. + int16_t vector16[kVectorSize] = {-1, + 7485, + 0, + 3333, + -18283, + 0, + 12334, + -29871, + 988, + -3333, + 345, + -456, + 222, + 999, + 888, + 8774, + WEBRTC_SPL_WORD16_MIN}; + int32_t vector32[kVectorSize] = {-1, + 0, + 283211, + 3333, + 8712345, + 0, + -3333, + 89345, + -374585456, + 222, + 999, + 122345334, + -12389756, + -987329871, + 888, + -2, + WEBRTC_SPL_WORD32_MIN}; + + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MinValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MIN, + WebRtcSpl_MinValueW32(vector32, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW16(vector16, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MinIndexW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MaxAbsElementW16(vector16, kVectorSize)); + int16_t min_value, max_value; + WebRtcSpl_MinMaxW16(vector16, kVectorSize, &min_value, &max_value); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, min_value); + EXPECT_EQ(12334, max_value); + + // Test the cases where maximum values have to be caught + // outside of the unrolled loops in ARM-Neon. + vector16[kVectorSize - 1] = WEBRTC_SPL_WORD16_MAX; + vector32[kVectorSize - 1] = WEBRTC_SPL_WORD32_MAX; + + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxValueW32(vector32, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW16(vector16, kVectorSize)); + EXPECT_EQ(kVectorSize - 1, WebRtcSpl_MaxIndexW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxAbsElementW16(vector16, kVectorSize)); + WebRtcSpl_MinMaxW16(vector16, kVectorSize, &min_value, &max_value); + EXPECT_EQ(-29871, min_value); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, max_value); + + // Test the cases where multiple maximum and minimum values are present. + vector16[1] = WEBRTC_SPL_WORD16_MAX; + vector16[6] = WEBRTC_SPL_WORD16_MIN; + vector16[11] = WEBRTC_SPL_WORD16_MIN; + vector32[1] = WEBRTC_SPL_WORD32_MAX; + vector32[6] = WEBRTC_SPL_WORD32_MIN; + vector32[11] = WEBRTC_SPL_WORD32_MIN; + + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxAbsValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MinValueW16(vector16, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxAbsValueW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MAX, + WebRtcSpl_MaxValueW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD32_MIN, + WebRtcSpl_MinValueW32(vector32, kVectorSize)); + EXPECT_EQ(6u, WebRtcSpl_MaxAbsIndexW16(vector16, kVectorSize)); + EXPECT_EQ(1u, WebRtcSpl_MaxIndexW16(vector16, kVectorSize)); + EXPECT_EQ(1u, WebRtcSpl_MaxIndexW32(vector32, kVectorSize)); + EXPECT_EQ(6u, WebRtcSpl_MinIndexW16(vector16, kVectorSize)); + EXPECT_EQ(6u, WebRtcSpl_MinIndexW32(vector32, kVectorSize)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MaxAbsElementW16(vector16, kVectorSize)); + WebRtcSpl_MinMaxW16(vector16, kVectorSize, &min_value, &max_value); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, min_value); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, max_value); + + // Test a one-element vector. + int16_t single_element_vector = 0; + EXPECT_EQ(0, WebRtcSpl_MaxAbsValueW16(&single_element_vector, 1)); + EXPECT_EQ(0, WebRtcSpl_MaxValueW16(&single_element_vector, 1)); + EXPECT_EQ(0, WebRtcSpl_MinValueW16(&single_element_vector, 1)); + EXPECT_EQ(0u, WebRtcSpl_MaxAbsIndexW16(&single_element_vector, 1)); + EXPECT_EQ(0u, WebRtcSpl_MaxIndexW16(&single_element_vector, 1)); + EXPECT_EQ(0u, WebRtcSpl_MinIndexW16(&single_element_vector, 1)); + EXPECT_EQ(0, WebRtcSpl_MaxAbsElementW16(&single_element_vector, 1)); + WebRtcSpl_MinMaxW16(&single_element_vector, 1, &min_value, &max_value); + EXPECT_EQ(0, min_value); + EXPECT_EQ(0, max_value); + + // Test a two-element vector with the values WEBRTC_SPL_WORD16_MIN and + // WEBRTC_SPL_WORD16_MAX. + int16_t two_element_vector[2] = {WEBRTC_SPL_WORD16_MIN, + WEBRTC_SPL_WORD16_MAX}; + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxAbsValueW16(two_element_vector, 2)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, + WebRtcSpl_MaxValueW16(two_element_vector, 2)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MinValueW16(two_element_vector, 2)); + EXPECT_EQ(0u, WebRtcSpl_MaxAbsIndexW16(two_element_vector, 2)); + EXPECT_EQ(1u, WebRtcSpl_MaxIndexW16(two_element_vector, 2)); + EXPECT_EQ(0u, WebRtcSpl_MinIndexW16(two_element_vector, 2)); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, + WebRtcSpl_MaxAbsElementW16(two_element_vector, 2)); + WebRtcSpl_MinMaxW16(two_element_vector, 2, &min_value, &max_value); + EXPECT_EQ(WEBRTC_SPL_WORD16_MIN, min_value); + EXPECT_EQ(WEBRTC_SPL_WORD16_MAX, max_value); +} + +TEST(SplTest, VectorOperationsTest) { + const size_t kVectorSize = 4; + int B[] = {4, 12, 133, 1100}; + int16_t a16[kVectorSize]; + int16_t b16[kVectorSize]; + int16_t bTmp16[kVectorSize]; + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + a16[kk] = B[kk]; + b16[kk] = B[kk]; + } + + WebRtcSpl_AffineTransformVector(bTmp16, b16, 3, 7, 2, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk] * 3 + 7) >> 2, bTmp16[kk]); + } + WebRtcSpl_ScaleAndAddVectorsWithRound(b16, 3, b16, 2, 2, bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((B[kk] * 3 + B[kk] * 2 + 2) >> 2, bTmp16[kk]); + } + + WebRtcSpl_AddAffineVectorToVector(bTmp16, b16, 3, 7, 2, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(((B[kk] * 3 + B[kk] * 2 + 2) >> 2) + ((b16[kk] * 3 + 7) >> 2), + bTmp16[kk]); + } + + WebRtcSpl_ScaleVector(b16, bTmp16, 13, kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((b16[kk] * 13) >> 2, bTmp16[kk]); + } + WebRtcSpl_ScaleVectorWithSat(b16, bTmp16, 13, kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((b16[kk] * 13) >> 2, bTmp16[kk]); + } + WebRtcSpl_ScaleAndAddVectors(a16, 13, 2, b16, 7, 2, bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(((a16[kk] * 13) >> 2) + ((b16[kk] * 7) >> 2), bTmp16[kk]); + } + + WebRtcSpl_AddVectorsAndShift(bTmp16, a16, b16, kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(B[kk] >> 1, bTmp16[kk]); + } + WebRtcSpl_ReverseOrderMultArrayElements(bTmp16, a16, &b16[3], kVectorSize, 2); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((a16[kk] * b16[3 - kk]) >> 2, bTmp16[kk]); + } + WebRtcSpl_ElementwiseVectorMult(bTmp16, a16, b16, kVectorSize, 6); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ((a16[kk] * b16[kk]) >> 6, bTmp16[kk]); + } + + WebRtcSpl_SqrtOfOneMinusXSquared(b16, kVectorSize, bTmp16); + for (size_t kk = 0; kk < kVectorSize - 1; ++kk) { + EXPECT_EQ(32767, bTmp16[kk]); + } + EXPECT_EQ(32749, bTmp16[kVectorSize - 1]); + + EXPECT_EQ(0, WebRtcSpl_GetScalingSquare(b16, kVectorSize, 1)); +} + +TEST(SplTest, EstimatorsTest) { + const size_t kOrder = 2; + const int32_t unstable_filter[] = {4, 12, 133, 1100}; + const int32_t stable_filter[] = {1100, 133, 12, 4}; + int16_t lpc[kOrder + 2] = {0}; + int16_t refl[kOrder + 2] = {0}; + int16_t lpc_result[] = {4096, -497, 15, 0}; + int16_t refl_result[] = {-3962, 123, 0, 0}; + + EXPECT_EQ(0, WebRtcSpl_LevinsonDurbin(unstable_filter, lpc, refl, kOrder)); + EXPECT_EQ(1, WebRtcSpl_LevinsonDurbin(stable_filter, lpc, refl, kOrder)); + for (size_t i = 0; i < kOrder + 2; ++i) { + EXPECT_EQ(lpc_result[i], lpc[i]); + EXPECT_EQ(refl_result[i], refl[i]); + } +} + +TEST(SplTest, FilterTest) { + const size_t kVectorSize = 4; + const size_t kFilterOrder = 3; + int16_t A[] = {1, 2, 33, 100}; + int16_t A5[] = {1, 2, 33, 100, -5}; + int16_t B[] = {4, 12, 133, 110}; + int16_t data_in[kVectorSize]; + int16_t data_out[kVectorSize]; + int16_t bTmp16Low[kVectorSize]; + int16_t bState[kVectorSize]; + int16_t bStateLow[kVectorSize]; + + WebRtcSpl_ZerosArrayW16(bState, kVectorSize); + WebRtcSpl_ZerosArrayW16(bStateLow, kVectorSize); + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + data_in[kk] = A[kk]; + data_out[kk] = 0; + } + + // MA filters. + // Note that the input data has `kFilterOrder` states before the actual + // data (one sample). + WebRtcSpl_FilterMAFastQ12(&data_in[kFilterOrder], data_out, B, + kFilterOrder + 1, 1); + EXPECT_EQ(0, data_out[0]); + // AR filters. + // Note that the output data has `kFilterOrder` states before the actual + // data (one sample). + WebRtcSpl_FilterARFastQ12(data_in, &data_out[kFilterOrder], A, + kFilterOrder + 1, 1); + EXPECT_EQ(0, data_out[kFilterOrder]); + + EXPECT_EQ(kVectorSize, WebRtcSpl_FilterAR(A5, 5, data_in, kVectorSize, bState, + kVectorSize, bStateLow, kVectorSize, + data_out, bTmp16Low, kVectorSize)); +} + +TEST(SplTest, RandTest) { + const int kVectorSize = 4; + int16_t BU[] = {3653, 12446, 8525, 30691}; + int16_t b16[kVectorSize]; + uint32_t bSeed = 100000; + + EXPECT_EQ(7086, WebRtcSpl_RandU(&bSeed)); + EXPECT_EQ(31565, WebRtcSpl_RandU(&bSeed)); + EXPECT_EQ(-9786, WebRtcSpl_RandN(&bSeed)); + EXPECT_EQ(kVectorSize, WebRtcSpl_RandUArray(b16, kVectorSize, &bSeed)); + for (int kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(BU[kk], b16[kk]); + } +} + +TEST(SplTest, DotProductWithScaleTest) { + EXPECT_EQ(605362796, WebRtcSpl_DotProductWithScale(vector16, vector16, + kVector16Size, 2)); +} + +TEST(SplTest, CrossCorrelationTest) { + // Note the function arguments relation specificed by API. + const size_t kCrossCorrelationDimension = 3; + const int kShift = 2; + const int kStep = 1; + const size_t kSeqDimension = 6; + + const int16_t kVector16[kVector16Size] = { + 1, 4323, 1963, WEBRTC_SPL_WORD16_MAX, WEBRTC_SPL_WORD16_MIN + 5, -3333, + -876, 8483, 142}; + int32_t vector32[kCrossCorrelationDimension] = {0}; + + WebRtcSpl_CrossCorrelation(vector32, vector16, kVector16, kSeqDimension, + kCrossCorrelationDimension, kShift, kStep); + + // WebRtcSpl_CrossCorrelationC() and WebRtcSpl_CrossCorrelationNeon() + // are not bit-exact. + const int32_t kExpected[kCrossCorrelationDimension] = {-266947903, -15579555, + -171282001}; + const int32_t* expected = kExpected; +#if !defined(MIPS32_LE) + const int32_t kExpectedNeon[kCrossCorrelationDimension] = { + -266947901, -15579553, -171281999}; + if (WebRtcSpl_CrossCorrelation != WebRtcSpl_CrossCorrelationC) { + expected = kExpectedNeon; + } +#endif + for (size_t i = 0; i < kCrossCorrelationDimension; ++i) { + EXPECT_EQ(expected[i], vector32[i]); + } +} + +TEST(SplTest, AutoCorrelationTest) { + int scale = 0; + int32_t vector32[kVector16Size]; + const int32_t expected[kVector16Size] = {302681398, 14223410, -121705063, + -85221647, -17104971, 61806945, + 6644603, -669329, 43}; + + EXPECT_EQ(kVector16Size, + WebRtcSpl_AutoCorrelation(vector16, kVector16Size, + kVector16Size - 1, vector32, &scale)); + EXPECT_EQ(3, scale); + for (size_t i = 0; i < kVector16Size; ++i) { + EXPECT_EQ(expected[i], vector32[i]); + } +} + +TEST(SplTest, SignalProcessingTest) { + const size_t kVectorSize = 4; + int A[] = {1, 2, 33, 100}; + const int16_t kHanning[4] = {2399, 8192, 13985, 16384}; + int16_t b16[kVectorSize]; + + int16_t bTmp16[kVectorSize]; + + int bScale = 0; + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + b16[kk] = A[kk]; + } + + // TODO(bjornv): Activate the Reflection Coefficient tests when refactoring. + // WebRtcSpl_ReflCoefToLpc(b16, kVectorSize, bTmp16); + //// for (int kk = 0; kk < kVectorSize; ++kk) { + //// EXPECT_EQ(aTmp16[kk], bTmp16[kk]); + //// } + // WebRtcSpl_LpcToReflCoef(bTmp16, kVectorSize, b16); + //// for (int kk = 0; kk < kVectorSize; ++kk) { + //// EXPECT_EQ(a16[kk], b16[kk]); + //// } + // WebRtcSpl_AutoCorrToReflCoef(b32, kVectorSize, bTmp16); + //// for (int kk = 0; kk < kVectorSize; ++kk) { + //// EXPECT_EQ(aTmp16[kk], bTmp16[kk]); + //// } + + WebRtcSpl_GetHanningWindow(bTmp16, kVectorSize); + for (size_t kk = 0; kk < kVectorSize; ++kk) { + EXPECT_EQ(kHanning[kk], bTmp16[kk]); + } + + for (size_t kk = 0; kk < kVectorSize; ++kk) { + b16[kk] = A[kk]; + } + EXPECT_EQ(11094, WebRtcSpl_Energy(b16, kVectorSize, &bScale)); + EXPECT_EQ(0, bScale); +} + +TEST(SplTest, FFTTest) { + int16_t B[] = {1, 2, 33, 100, 2, 3, 34, 101, 3, 4, 35, 102, 4, 5, 36, 103}; + + EXPECT_EQ(0, WebRtcSpl_ComplexFFT(B, 3, 1)); + // for (int kk = 0; kk < 16; ++kk) { + // EXPECT_EQ(A[kk], B[kk]); + // } + EXPECT_EQ(0, WebRtcSpl_ComplexIFFT(B, 3, 1)); + // for (int kk = 0; kk < 16; ++kk) { + // EXPECT_EQ(A[kk], B[kk]); + // } + WebRtcSpl_ComplexBitReverse(B, 3); + for (int kk = 0; kk < 16; ++kk) { + // EXPECT_EQ(A[kk], B[kk]); + } +} + +TEST(SplTest, Resample48WithSaturationTest) { + // The test resamples 3*kBlockSize number of samples to 2*kBlockSize number + // of samples. + const size_t kBlockSize = 16; + + // Saturated input vector of 48 samples. + const int32_t kVectorSaturated[3 * kBlockSize + 7] = { + -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, + -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, + -32768, -32768, -32768, -32768, -32768, -32768, -32768, -32768, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32767, 32767, 32767}; + + // All values in `out_vector` should be `kRefValue32kHz`. + const int32_t kRefValue32kHz1 = -1077493760; + const int32_t kRefValue32kHz2 = 1077493645; + + // After bit shift with saturation, `out_vector_w16` is saturated. + + const int16_t kRefValue16kHz1 = -32768; + const int16_t kRefValue16kHz2 = 32767; + // Vector for storing output. + int32_t out_vector[2 * kBlockSize]; + int16_t out_vector_w16[2 * kBlockSize]; + + WebRtcSpl_Resample48khzTo32khz(kVectorSaturated, out_vector, kBlockSize); + WebRtcSpl_VectorBitShiftW32ToW16(out_vector_w16, 2 * kBlockSize, out_vector, + 15); + + // Comparing output values against references. The values at position + // 12-15 are skipped to account for the filter lag. + for (size_t i = 0; i < 12; ++i) { + EXPECT_EQ(kRefValue32kHz1, out_vector[i]); + EXPECT_EQ(kRefValue16kHz1, out_vector_w16[i]); + } + for (size_t i = 16; i < 2 * kBlockSize; ++i) { + EXPECT_EQ(kRefValue32kHz2, out_vector[i]); + EXPECT_EQ(kRefValue16kHz2, out_vector_w16[i]); + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/spl_init.c b/third_party/libwebrtc/common_audio/signal_processing/spl_init.c new file mode 100644 index 0000000000..cf37d47bec --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/spl_init.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Some code came from common/rtcd.c in the WebM project. + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bugs.webrtc.org/9553): These function pointers are useless. Refactor +// things so that we simply have a bunch of regular functions with different +// implementations for different platforms. + +#if defined(WEBRTC_HAS_NEON) + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; +const CrossCorrelation WebRtcSpl_CrossCorrelation = + WebRtcSpl_CrossCorrelationNeon; +const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon; +const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; + +#elif defined(MIPS32_LE) + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = +#ifdef MIPS_DSP_R1_LE + WebRtcSpl_MaxAbsValueW32_mips; +#else + WebRtcSpl_MaxAbsValueW32C; +#endif +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; +const CrossCorrelation WebRtcSpl_CrossCorrelation = + WebRtcSpl_CrossCorrelation_mips; +const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; +const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound = +#ifdef MIPS_DSP_R1_LE + WebRtcSpl_ScaleAndAddVectorsWithRound_mips; +#else + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +#endif + +#else + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; +const CrossCorrelation WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; +const DownsampleFast WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; +const ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; + +#endif diff --git a/third_party/libwebrtc/common_audio/signal_processing/spl_inl.c b/third_party/libwebrtc/common_audio/signal_processing/spl_inl.c new file mode 100644 index 0000000000..d09e308ed3 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/spl_inl.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdint.h> + +#include "common_audio/signal_processing/include/spl_inl.h" + +// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n +// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at +// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in +// n. +const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12, +}; diff --git a/third_party/libwebrtc/common_audio/signal_processing/spl_sqrt.c b/third_party/libwebrtc/common_audio/signal_processing/spl_sqrt.c new file mode 100644 index 0000000000..cf9448ac97 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/spl_sqrt.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_Sqrt(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +int32_t WebRtcSpl_SqrtLocal(int32_t in); + +int32_t WebRtcSpl_SqrtLocal(int32_t in) +{ + + int16_t x_half, t16; + int32_t A, B, x2; + + /* The following block performs: + y=in/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + */ + + B = in / 2; + + B = B - ((int32_t)0x40000000); // B = in/2 - 1/2 + x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2 + B = B + ((int32_t)0x40000000); // B = 1 + x/2 + B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31) + + x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2 + A = -x2; // A = -(x/2)^2 + B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2 + + A >>= 16; + A = A * A * 2; // A = (x/2)^4 + t16 = (int16_t)(A >> 16); + B += -20480 * t16 * 2; // B = B - 0.625*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + + A = x_half * t16 * 2; // A = (x/2)^5 + t16 = (int16_t)(A >> 16); + B += 28672 * t16 * 2; // B = B + 0.875*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + t16 = (int16_t)(x2 >> 16); + A = x_half * t16 * 2; // A = x/2^3 + + B = B + (A >> 1); // B = B + 0.5*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + B = B + ((int32_t)32768); // Round off bit + + return B; +} + +int32_t WebRtcSpl_Sqrt(int32_t value) +{ + /* + Algorithm: + + Six term Taylor Series is used here to compute the square root of a number + y^0.5 = (1+x)^0.5 where x = y-1 + = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) + 0.5 <= x < 1 + + Example of how the algorithm works, with ut=sqrt(in), and + with in=73632 and ut=271 (even shift value case): + + in=73632 + y= in/131072 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2))*512 + + or: + + in=73632 + in2=73632*2^14 + y= in2/2^31 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 0.56176757812500 + x = -0.43823242187500 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + or: + + in=73632 + in2=73632*2^14 + y=in2/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 603193344 + x = -470548480 + x_half = -0.21911621093750 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + */ + + int16_t x_norm, nshift, t16, sh; + int32_t A; + + int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82) + + A = value; + + // The convention in this function is to calculate sqrt(abs(A)). Negate the + // input if it is negative. + if (A < 0) { + if (A == WEBRTC_SPL_WORD32_MIN) { + // This number cannot be held in an int32_t after negating. + // Map it to the maximum positive value. + A = WEBRTC_SPL_WORD32_MAX; + } else { + A = -A; + } + } else if (A == 0) { + return 0; // sqrt(0) = 0 + } + + sh = WebRtcSpl_NormW32(A); // # shifts to normalize A + A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A + if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) + { + A = A + ((int32_t)32768); // Round off bit + } else + { + A = WEBRTC_SPL_WORD32_MAX; + } + + x_norm = (int16_t)(A >> 16); // x_norm = AH + + nshift = (sh / 2); + RTC_DCHECK_GE(nshift, 0); + + A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16); + A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16) + A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A) + + if (2 * nshift == sh) { + // Even shift value case + + t16 = (int16_t)(A >> 16); // t16 = AH + + A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16 + A = A + ((int32_t)32768); // Round off + A = A & ((int32_t)0x7fff0000); // Round off + + A >>= 15; // A = A>>16 + + } else + { + A >>= 16; // A = A>>16 + } + + A = A & ((int32_t)0x0000ffff); + A >>= nshift; // De-normalize the result. + + return A; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/splitting_filter.c b/third_party/libwebrtc/common_audio/signal_processing/splitting_filter.c new file mode 100644 index 0000000000..27a0a2a8c9 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/splitting_filter.c @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the splitting filter functions. + * + */ + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" + +// Maximum number of samples in a low/high-band frame. +enum +{ + kMaxBandFrameLength = 320 // 10 ms at 64 kHz. +}; + +// QMF filter coefficients in Q16. +static const uint16_t WebRtcSpl_kAllPassFilter1[3] = {6418, 36982, 57261}; +static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010}; + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcSpl_AllPassQMF(...) +// +// Allpass filter used by the analysis and synthesis parts of the QMF filter. +// +// Input: +// - in_data : Input data sequence (Q10) +// - data_length : Length of data sequence (>2) +// - filter_coefficients : Filter coefficients (length 3, Q16) +// +// Input & Output: +// - filter_state : Filter state (length 6, Q10). +// +// Output: +// - out_data : Output data sequence (Q10), length equal to +// `data_length` +// + +static void WebRtcSpl_AllPassQMF(int32_t* in_data, + size_t data_length, + int32_t* out_data, + const uint16_t* filter_coefficients, + int32_t* filter_state) +{ + // The procedure is to filter the input with three first order all pass + // filters (cascade operations). + // + // a_3 + q^-1 a_2 + q^-1 a_1 + q^-1 + // y[n] = ----------- ----------- ----------- x[n] + // 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1 + // + // The input vector `filter_coefficients` includes these three filter + // coefficients. The filter state contains the in_data state, in_data[-1], + // followed by the out_data state, out_data[-1]. This is repeated for each + // cascade. The first cascade filter will filter the `in_data` and store + // the output in `out_data`. The second will the take the `out_data` as + // input and make an intermediate storage in `in_data`, to save memory. The + // third, and final, cascade filter operation takes the `in_data` (which is + // the output from the previous cascade filter) and store the output in + // `out_data`. Note that the input vector values are changed during the + // process. + size_t k; + int32_t diff; + // First all-pass cascade; filter from in_data to out_data. + + // Let y_i[n] indicate the output of cascade filter i (with filter + // coefficient a_i) at vector position n. Then the final output will be + // y[n] = y_3[n] + + // First loop, use the states stored in memory. + // "diff" should be safe from wrap around since max values are 2^25 + // diff = (x[0] - y_1[-1]) + diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]); + // y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1]) + out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]); + + // For the remaining loops, use previous values. + for (k = 1; k < data_length; k++) + { + // diff = (x[n] - y_1[n-1]) + diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); + // y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1]) + out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]); + } + + // Update states. + filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time + filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time + + // Second all-pass cascade; filter from out_data to in_data. + // diff = (y_1[0] - y_2[-1]) + diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]); + // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) + in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]); + for (k = 1; k < data_length; k++) + { + // diff = (y_1[n] - y_2[n-1]) + diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]); + // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) + in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]); + } + + filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time + filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time + + // Third all-pass cascade; filter from in_data to out_data. + // diff = (y_2[0] - y[-1]) + diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]); + // y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1]) + out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]); + for (k = 1; k < data_length; k++) + { + // diff = (y_2[n] - y[n-1]) + diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); + // y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1]) + out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]); + } + filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time + filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time +} + +void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length, + int16_t* low_band, int16_t* high_band, + int32_t* filter_state1, int32_t* filter_state2) +{ + size_t i; + int16_t k; + int32_t tmp; + int32_t half_in1[kMaxBandFrameLength]; + int32_t half_in2[kMaxBandFrameLength]; + int32_t filter1[kMaxBandFrameLength]; + int32_t filter2[kMaxBandFrameLength]; + const size_t band_length = in_data_length / 2; + RTC_DCHECK_EQ(0, in_data_length % 2); + RTC_DCHECK_LE(band_length, kMaxBandFrameLength); + + // Split even and odd samples. Also shift them to Q10. + for (i = 0, k = 0; i < band_length; i++, k += 2) + { + half_in2[i] = ((int32_t)in_data[k]) * (1 << 10); + half_in1[i] = ((int32_t)in_data[k + 1]) * (1 << 10); + } + + // All pass filter even and odd samples, independently. + WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, + WebRtcSpl_kAllPassFilter1, filter_state1); + WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, + WebRtcSpl_kAllPassFilter2, filter_state2); + + // Take the sum and difference of filtered version of odd and even + // branches to get upper & lower band. + for (i = 0; i < band_length; i++) + { + tmp = (filter1[i] + filter2[i] + 1024) >> 11; + low_band[i] = WebRtcSpl_SatW32ToW16(tmp); + + tmp = (filter1[i] - filter2[i] + 1024) >> 11; + high_band[i] = WebRtcSpl_SatW32ToW16(tmp); + } +} + +void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band, + size_t band_length, int16_t* out_data, + int32_t* filter_state1, int32_t* filter_state2) +{ + int32_t tmp; + int32_t half_in1[kMaxBandFrameLength]; + int32_t half_in2[kMaxBandFrameLength]; + int32_t filter1[kMaxBandFrameLength]; + int32_t filter2[kMaxBandFrameLength]; + size_t i; + int16_t k; + RTC_DCHECK_LE(band_length, kMaxBandFrameLength); + + // Obtain the sum and difference channels out of upper and lower-band channels. + // Also shift to Q10 domain. + for (i = 0; i < band_length; i++) + { + tmp = (int32_t)low_band[i] + (int32_t)high_band[i]; + half_in1[i] = tmp * (1 << 10); + tmp = (int32_t)low_band[i] - (int32_t)high_band[i]; + half_in2[i] = tmp * (1 << 10); + } + + // all-pass filter the sum and difference channels + WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, + WebRtcSpl_kAllPassFilter2, filter_state1); + WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, + WebRtcSpl_kAllPassFilter1, filter_state2); + + // The filtered signals are even and odd samples of the output. Combine + // them. The signals are Q10 should shift them back to Q0 and take care of + // saturation. + for (i = 0, k = 0; i < band_length; i++) + { + tmp = (filter2[i] + 512) >> 10; + out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); + + tmp = (filter1[i] + 512) >> 10; + out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); + } + +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c b/third_party/libwebrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c new file mode 100644 index 0000000000..a77fd4063f --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length, + int16_t *yQ15) +{ + int32_t sq; + size_t m; + int16_t tmp; + + for (m = 0; m < vector_length; m++) + { + tmp = xQ15[m]; + sq = tmp * tmp; // x^2 in Q30 + sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30 + sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15 + yQ15[m] = (int16_t)sq; + } +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/vector_scaling_operations.c b/third_party/libwebrtc/common_audio/signal_processing/vector_scaling_operations.c new file mode 100644 index 0000000000..7307dc78ff --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/vector_scaling_operations.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the functions + * WebRtcSpl_VectorBitShiftW16() + * WebRtcSpl_VectorBitShiftW32() + * WebRtcSpl_VectorBitShiftW32ToW16() + * WebRtcSpl_ScaleVector() + * WebRtcSpl_ScaleVectorWithSat() + * WebRtcSpl_ScaleAndAddVectors() + * WebRtcSpl_ScaleAndAddVectorsWithRoundC() + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length, + const int16_t *in, int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) >> right_shifts); + } + } else + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) * (1 << (-right_shifts))); + } + } +} + +void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, + size_t vector_length, + const int32_t *in_vector, + int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) >> right_shifts); + } + } else + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) << (-right_shifts)); + } + } +} + +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, + const int32_t* in, int right_shifts) { + size_t i; + int32_t tmp_w32; + + if (right_shifts >= 0) { + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) >> right_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } else { + int left_shifts = -right_shifts; + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) << left_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } +} + +void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) { + *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, + const int16_t *in2, int16_t gain2, int shift2, + int16_t *out, size_t vector_length) +{ + // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 + size_t i; + const int16_t *in1ptr; + const int16_t *in2ptr; + int16_t *outptr; + + in1ptr = in1; + in2ptr = in2; + outptr = out; + + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + + (int16_t)((gain2 * *in2ptr++) >> shift2); + } +} + +// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. +int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length) { + size_t i = 0; + int round_value = (1 << right_shifts) >> 1; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length == 0 || right_shifts < 0) { + return -1; + } + + for (i = 0; i < length; i++) { + out_vector[i] = (int16_t)(( + in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale + + round_value) >> right_shifts); + } + + return 0; +} diff --git a/third_party/libwebrtc/common_audio/signal_processing/vector_scaling_operations_mips.c b/third_party/libwebrtc/common_audio/signal_processing/vector_scaling_operations_mips.c new file mode 100644 index 0000000000..ba2d26d422 --- /dev/null +++ b/third_party/libwebrtc/common_audio/signal_processing/vector_scaling_operations_mips.c @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the functions + * WebRtcSpl_ScaleAndAddVectorsWithRound_mips() + */ + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length) { + int16_t r0 = 0, r1 = 0; + int16_t *in1 = (int16_t*)in_vector1; + int16_t *in2 = (int16_t*)in_vector2; + int16_t *out = out_vector; + size_t i = 0; + int value32 = 0; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length == 0 || right_shifts < 0) { + return -1; + } + for (i = 0; i < length; i++) { + __asm __volatile ( + "lh %[r0], 0(%[in1]) \n\t" + "lh %[r1], 0(%[in2]) \n\t" + "mult %[r0], %[in_vector1_scale] \n\t" + "madd %[r1], %[in_vector2_scale] \n\t" + "extrv_r.w %[value32], $ac0, %[right_shifts] \n\t" + "addiu %[in1], %[in1], 2 \n\t" + "addiu %[in2], %[in2], 2 \n\t" + "sh %[value32], 0(%[out]) \n\t" + "addiu %[out], %[out], 2 \n\t" + : [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1), + [in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1) + : [in_vector1_scale] "r" (in_vector1_scale), + [in_vector2_scale] "r" (in_vector2_scale), + [right_shifts] "r" (right_shifts) + : "hi", "lo", "memory" + ); + } + return 0; +} |