From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- media/libspeex_resampler/01_outside-speex.patch | 30 + .../02_simd-detect-runtime.patch | 368 ++++++ media/libspeex_resampler/03_set-skip-frac.patch | 93 ++ media/libspeex_resampler/04_hugemem.patch | 57 + .../05_set-rate-overflow-no-return.patch | 27 + media/libspeex_resampler/AUTHORS | 18 + media/libspeex_resampler/COPYING | 35 + media/libspeex_resampler/moz.build | 11 + media/libspeex_resampler/moz.yaml | 66 + media/libspeex_resampler/src/arch.h | 232 ++++ media/libspeex_resampler/src/fixed_generic.h | 106 ++ media/libspeex_resampler/src/moz.build | 47 + media/libspeex_resampler/src/resample.c | 1268 ++++++++++++++++++++ media/libspeex_resampler/src/resample_neon.c | 342 ++++++ media/libspeex_resampler/src/resample_sse.c | 130 ++ media/libspeex_resampler/src/simd_detect.cpp | 27 + media/libspeex_resampler/src/simd_detect.h | 43 + media/libspeex_resampler/src/speex_resampler.h | 361 ++++++ 18 files changed, 3261 insertions(+) create mode 100644 media/libspeex_resampler/01_outside-speex.patch create mode 100644 media/libspeex_resampler/02_simd-detect-runtime.patch create mode 100644 media/libspeex_resampler/03_set-skip-frac.patch create mode 100644 media/libspeex_resampler/04_hugemem.patch create mode 100644 media/libspeex_resampler/05_set-rate-overflow-no-return.patch create mode 100644 media/libspeex_resampler/AUTHORS create mode 100644 media/libspeex_resampler/COPYING create mode 100644 media/libspeex_resampler/moz.build create mode 100644 media/libspeex_resampler/moz.yaml create mode 100644 media/libspeex_resampler/src/arch.h create mode 100644 media/libspeex_resampler/src/fixed_generic.h create mode 100644 media/libspeex_resampler/src/moz.build create mode 100644 media/libspeex_resampler/src/resample.c create mode 100644 media/libspeex_resampler/src/resample_neon.c create mode 100644 media/libspeex_resampler/src/resample_sse.c create mode 100644 media/libspeex_resampler/src/simd_detect.cpp create mode 100644 media/libspeex_resampler/src/simd_detect.h create mode 100644 media/libspeex_resampler/src/speex_resampler.h (limited to 'media/libspeex_resampler') diff --git a/media/libspeex_resampler/01_outside-speex.patch b/media/libspeex_resampler/01_outside-speex.patch new file mode 100644 index 0000000000..21cbd2749d --- /dev/null +++ b/media/libspeex_resampler/01_outside-speex.patch @@ -0,0 +1,30 @@ +diff --git a/src/speex_resampler.h b/src/speex_resampler.h +--- a/src/speex_resampler.h ++++ b/src/speex_resampler.h +@@ -34,24 +34,25 @@ + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + */ + + + #ifndef SPEEX_RESAMPLER_H + #define SPEEX_RESAMPLER_H + +-#ifdef OUTSIDE_SPEEX ++#if 1 /* OUTSIDE_SPEEX */ + + /********* WARNING: MENTAL SANITY ENDS HERE *************/ + + /* If the resampler is defined outside of Speex, we change the symbol names so that + there won't be any clash if linking with Speex later on. */ + + /* #define RANDOM_PREFIX your software name here */ ++#define RANDOM_PREFIX moz_speex + #ifndef RANDOM_PREFIX + #error "Please define RANDOM_PREFIX (above) to something specific to your project to prevent symbol name clashes" + #endif + + #define CAT_PREFIX2(a,b) a ## b + #define CAT_PREFIX(a,b) CAT_PREFIX2(a, b) + + #define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init) diff --git a/media/libspeex_resampler/02_simd-detect-runtime.patch b/media/libspeex_resampler/02_simd-detect-runtime.patch new file mode 100644 index 0000000000..bf23c8ac8e --- /dev/null +++ b/media/libspeex_resampler/02_simd-detect-runtime.patch @@ -0,0 +1,368 @@ +diff --git a/src/resample.c b/src/resample.c +--- a/src/resample.c ++++ b/src/resample.c +@@ -91,23 +91,17 @@ static void speex_free(void *ptr) {free( + #ifndef NULL + #define NULL 0 + #endif + + #ifndef UINT32_MAX + #define UINT32_MAX 4294967295U + #endif + +-#ifdef USE_SSE +-#include "resample_sse.h" +-#endif +- +-#ifdef USE_NEON +-#include "resample_neon.h" +-#endif ++#include "simd_detect.h" + + /* Number of elements to allocate on the stack */ + #ifdef VAR_ARRAYS + #define FIXED_STACK_ALLOC 8192 + #else + #define FIXED_STACK_ALLOC 1024 + #endif + +@@ -341,17 +335,19 @@ static int resampler_basic_direct_single + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +-#ifndef OVERRIDE_INNER_PRODUCT_SINGLE ++#ifdef OVERRIDE_INNER_PRODUCT_SINGLE ++ if (!moz_speex_have_single_simd()) { ++#endif + int j; + sum = 0; + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; +@@ -399,29 +397,33 @@ static int resampler_basic_direct_double + const spx_uint32_t den_rate = st->den_rate; + double sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +-#ifndef OVERRIDE_INNER_PRODUCT_DOUBLE ++#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE ++ if(moz_speex_have_double_simd()) { ++#endif + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; +@@ -455,34 +457,38 @@ static int resampler_basic_interpolate_s + #ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); + #else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; + #endif + spx_word16_t interp[4]; + + +-#ifndef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE ++ if (!moz_speex_have_single_simd()) { ++#endif + int j; + spx_word32_t accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); + sum = SATURATE32PSHR(sum, 15, 32767); +-#else ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE ++ } else { + cubic_coef(frac, interp); + sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); ++ } + #endif + + out[out_stride * out_sample++] = sum; + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; +@@ -518,33 +524,37 @@ static int resampler_basic_interpolate_d + #ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); + #else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; + #endif + spx_word16_t interp[4]; + + +-#ifndef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++ if (!moz_speex_have_double_simd()) { ++#endif + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); +-#else ++#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE ++ } else { + cubic_coef(frac, interp); + sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); ++ } + #endif + + out[out_stride * out_sample++] = PSHR32(sum,15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; +diff --git a/src/resample_neon.c b/src/resample_neon.c +--- a/src/resample_neon.c ++++ b/src/resample_neon.c +@@ -32,16 +32,17 @@ + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + #include ++#include "simd_detect.h" + + #ifdef FIXED_POINT + #if defined(__aarch64__) + static inline int32_t saturate_32bit_to_16bit(int32_t a) { + int32_t ret; + asm ("fmov s0, %w[a]\n" + "sqxtn h0, s0\n" + "sxtl v0.4s, v0.4h\n" +@@ -73,17 +74,17 @@ + } + #endif + #undef WORD2INT + #define WORD2INT(x) (saturate_32bit_to_16bit(x)) + + #define OVERRIDE_INNER_PRODUCT_SINGLE + /* Only works when len % 4 == 0 and len >= 4 */ + #if defined(__aarch64__) +-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) ++inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) + { + int32_t ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %w[len], #0\n" + " b.ne 1f\n" + " ld1 {v16.4h}, [%[b]], #8\n" +@@ -128,17 +129,17 @@ + : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b), + [len] "+r" (len), [remainder] "+r" (remainder) + : + : "cc", "v0", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); + return ret; + } + #else +-static inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) ++inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) + { + int32_t ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %[len], #0\n" + " bne 1f\n" + " vld1.16 {d16}, [%[b]]!\n" +@@ -218,17 +219,17 @@ + #endif + + #undef WORD2INT + #define WORD2INT(x) (saturate_float_to_16bit(x)) + + #define OVERRIDE_INNER_PRODUCT_SINGLE + /* Only works when len % 4 == 0 and len >= 4 */ + #if defined(__aarch64__) +-static inline float inner_product_single(const float *a, const float *b, unsigned int len) ++inline float inner_product_single(const float *a, const float *b, unsigned int len) + { + float ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %w[len], #0\n" + " b.ne 1f\n" + " ld1 {v16.4s}, [%[b]], #16\n" +@@ -273,17 +274,17 @@ + : [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b), + [len] "+r" (len), [remainder] "+r" (remainder) + : + : "cc", "v1", "v2", "v3", "v4", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); + return ret; + } + #else +-static inline float inner_product_single(const float *a, const float *b, unsigned int len) ++inline float inner_product_single(const float *a, const float *b, unsigned int len) + { + float ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %[len], #0\n" + " bne 1f\n" + " vld1.32 {q4}, [%[b]]!\n" +diff --git a/src/resample_sse.c b/src/resample_sse.c +--- a/src/resample_sse.c ++++ b/src/resample_sse.c +@@ -29,37 +29,39 @@ + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + ++#include "simd_detect.h" ++ + #include + + #define OVERRIDE_INNER_PRODUCT_SINGLE +-static inline float inner_product_single(const float *a, const float *b, unsigned int len) ++float inner_product_single(const float *a, const float *b, unsigned int len) + { + int i; + float ret; + __m128 sum = _mm_setzero_ps(); + for (i=0;i + #define OVERRIDE_INNER_PRODUCT_DOUBLE + +-static inline double inner_product_double(const float *a, const float *b, unsigned int len) ++double inner_product_double(const float *a, const float *b, unsigned int len) + { + int i; + double ret; + __m128d sum = _mm_setzero_pd(); + __m128 t; + for (i=0;i +b=913854 add speex_resampler_set_skip_frac_num r=jmspeex + +This allows a client to align output samples consistently for independent +resampling of contiguous input buffers. + +diff --git a/src/resample.c b/src/resample.c +--- a/src/resample.c ++++ b/src/resample.c +@@ -1128,16 +1128,28 @@ EXPORT int speex_resampler_get_output_la + EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st) + { + spx_uint32_t i; + for (i=0;inb_channels;i++) + st->last_sample[i] = st->filt_len/2; + return RESAMPLER_ERR_SUCCESS; + } + ++EXPORT int speex_resampler_set_skip_frac_num(SpeexResamplerState *st, spx_uint32_t skip_frac_num) ++{ ++ spx_uint32_t i; ++ spx_uint32_t last_sample = skip_frac_num / st->den_rate; ++ spx_uint32_t samp_frac_num = skip_frac_num % st->den_rate; ++ for (i=0;inb_channels;i++) { ++ st->last_sample[i] = last_sample; ++ st->samp_frac_num[i] = samp_frac_num; ++ } ++ return RESAMPLER_ERR_SUCCESS; ++} ++ + EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st) + { + spx_uint32_t i; + for (i=0;inb_channels;i++) + { + st->last_sample[i] = 0; + st->magic_samples[i] = 0; + st->samp_frac_num[i] = 0; +diff --git a/src/speex_resampler.h b/src/speex_resampler.h +--- a/src/speex_resampler.h ++++ b/src/speex_resampler.h +@@ -69,16 +69,17 @@ + #define speex_resampler_get_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_get_quality) + #define speex_resampler_set_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_input_stride) + #define speex_resampler_get_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_stride) + #define speex_resampler_set_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_output_stride) + #define speex_resampler_get_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_stride) + #define speex_resampler_get_input_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_latency) + #define speex_resampler_get_output_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_latency) + #define speex_resampler_skip_zeros CAT_PREFIX(RANDOM_PREFIX,_resampler_skip_zeros) ++#define speex_resampler_set_skip_frac_num CAT_PREFIX(RANDOM_PREFIX,_resampler_set_skip_frac_num) + #define speex_resampler_reset_mem CAT_PREFIX(RANDOM_PREFIX,_resampler_reset_mem) + #define speex_resampler_strerror CAT_PREFIX(RANDOM_PREFIX,_resampler_strerror) + + #define spx_int16_t short + #define spx_int32_t int + #define spx_uint16_t unsigned short + #define spx_uint32_t unsigned int + +@@ -317,16 +318,32 @@ int speex_resampler_get_output_latency(S + * resampler. It is recommended to use that when resampling an audio file, as + * it will generate a file with the same length. For real-time processing, + * it is probably easier not to use this call (so that the output duration + * is the same for the first frame). + * @param st Resampler state + */ + int speex_resampler_skip_zeros(SpeexResamplerState *st); + ++/** Set the numerator in a fraction determining the advance through input ++ * samples before writing any output samples. The denominator of the fraction ++ * is the value returned from speex_resampler_get_ratio() in ratio_den. This ++ * is only useful before starting to use a newly created or reset resampler. ++ * If the first input sample is interpreted as the signal at time ++ * input_latency*in_rate, then the first output sample represents the signal ++ * at the time frac_num/ratio_num*out_rate. ++ * This is intended for careful alignment of output sample points wrt input ++ * sample points. Large values are not an efficient offset into the in buffer. ++ * @param st Resampler state ++ * @param skip_frac_num Numerator of the offset fraction, ++ * between 0 and ratio_den-1. ++ */ ++int speex_resampler_set_skip_frac_num(SpeexResamplerState *st, ++ spx_uint32_t skip_frac_num); ++ + /** Reset a resampler so a new (unrelated) stream can be processed. + * @param st Resampler state + */ + int speex_resampler_reset_mem(SpeexResamplerState *st); + + /** Returns the English meaning for an error code + * @param err Error code + * @return English string diff --git a/media/libspeex_resampler/04_hugemem.patch b/media/libspeex_resampler/04_hugemem.patch new file mode 100644 index 0000000000..f8bbac8c9f --- /dev/null +++ b/media/libspeex_resampler/04_hugemem.patch @@ -0,0 +1,57 @@ +diff --git a/src/resample.c b/src/resample.c +--- a/src/resample.c ++++ b/src/resample.c +@@ -56,16 +56,18 @@ + (e.g. 2/3), and get rid of the rounding operations in the inner loop. + The latter both reduces CPU time and makes the algorithm more SIMD-friendly. + */ + + #ifdef HAVE_CONFIG_H + #include "config.h" + #endif + ++#define RESAMPLE_HUGEMEM 1 ++ + #ifdef OUTSIDE_SPEEX + #include + static void *speex_alloc(int size) {return calloc(size,1);} + static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);} + static void speex_free(void *ptr) {free(ptr);} + #ifndef EXPORT + #define EXPORT + #endif +@@ -633,25 +645,26 @@ static int update_filter(SpeexResamplerS + st->oversample >>= 1; + if (st->oversample < 1) + st->oversample = 1; + } else { + /* up-sampling */ + st->cutoff = quality_map[st->quality].upsample_bandwidth; + } + +-#ifdef RESAMPLE_FULL_SINC_TABLE +- use_direct = 1; +- if (INT_MAX/sizeof(spx_word16_t)/st->den_rate < st->filt_len) +- goto fail; +-#else +- /* Choose the resampling type that requires the least amount of memory */ +- use_direct = st->filt_len*st->den_rate <= st->filt_len*st->oversample+8 +- && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len; ++ use_direct = ++#ifdef RESAMPLE_HUGEMEM ++ /* Choose the direct resampler, even with higher initialization costs, ++ when resampling any multiple of 100 to 44100. */ ++ st->den_rate <= 441 ++#else ++ /* Choose the resampling type that requires the least amount of memory */ ++ st->filt_len*st->den_rate <= st->filt_len*st->oversample+8 + #endif ++ && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len; + if (use_direct) + { + min_sinc_table_length = st->filt_len*st->den_rate; + } else { + if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len) + goto fail; + + min_sinc_table_length = st->filt_len*st->oversample+8; diff --git a/media/libspeex_resampler/05_set-rate-overflow-no-return.patch b/media/libspeex_resampler/05_set-rate-overflow-no-return.patch new file mode 100644 index 0000000000..01505ed9ae --- /dev/null +++ b/media/libspeex_resampler/05_set-rate-overflow-no-return.patch @@ -0,0 +1,27 @@ +This is a fix for https://bugzilla.mozilla.org/show_bug.cgi?id=1274083 + +diff --git a/src/resample.c b/src/resample.c +--- a/src/resample.c ++++ b/src/resample.c +@@ -1129,18 +1129,19 @@ EXPORT int speex_resampler_set_rate_frac + + st->num_rate /= fact; + st->den_rate /= fact; + + if (old_den > 0) + { + for (i=0;inb_channels;i++) + { +- if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) +- return RESAMPLER_ERR_OVERFLOW; ++ if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) { ++ st->samp_frac_num[i] = st->den_rate-1; ++ } + /* Safety net */ + if (st->samp_frac_num[i] >= st->den_rate) + st->samp_frac_num[i] = st->den_rate-1; + } + } + + if (st->initialised) + return update_filter(st); diff --git a/media/libspeex_resampler/AUTHORS b/media/libspeex_resampler/AUTHORS new file mode 100644 index 0000000000..395c3fec23 --- /dev/null +++ b/media/libspeex_resampler/AUTHORS @@ -0,0 +1,18 @@ +Jean-Marc Valin + All the code except the following + +David Rowe + lsp.c lsp.h + Also ideas and feedback + +John Francis Edwards + wave_out.[ch], some #ifdefs for windows port and MSVC project files + +Segher Boessenkool + Misc. optimizations (for QMF in particular) + +Atsuhiko Yamanaka : + Patch to speexenc.c to add Vorbis comment format + +Radim Kolar : + Patch to speexenc.c for supporting more input formats diff --git a/media/libspeex_resampler/COPYING b/media/libspeex_resampler/COPYING new file mode 100644 index 0000000000..7d6dc4aabd --- /dev/null +++ b/media/libspeex_resampler/COPYING @@ -0,0 +1,35 @@ +Copyright 2002-2008 Xiph.org Foundation +Copyright 2002-2008 Jean-Marc Valin +Copyright 2005-2007 Analog Devices Inc. +Copyright 2005-2008 Commonwealth Scientific and Industrial Research + Organisation (CSIRO) +Copyright 1993, 2002, 2006 David Rowe +Copyright 2003 EpicGames +Copyright 1992-1994 Jutta Degener, Carsten Bormann + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +- Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +- Neither the name of the Xiph.org Foundation nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/media/libspeex_resampler/moz.build b/media/libspeex_resampler/moz.build new file mode 100644 index 0000000000..8ed6a39726 --- /dev/null +++ b/media/libspeex_resampler/moz.build @@ -0,0 +1,11 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +with Files("**"): + BUG_COMPONENT = ("Core", "Web Audio") + +DIRS += ['src'] + diff --git a/media/libspeex_resampler/moz.yaml b/media/libspeex_resampler/moz.yaml new file mode 100644 index 0000000000..3e045d10db --- /dev/null +++ b/media/libspeex_resampler/moz.yaml @@ -0,0 +1,66 @@ +schema: 1 + +bugzilla: + product: Core + component: "Web Audio" + +origin: + name: speexdsp + description: DSP library derived from speex + + url: https://speex.org/ + + release: 738e17905e1ca2a1fa932ddd9c2a85d089f4e845 (2023-02-28T16:25:59.000-05:00). + revision: 738e17905e1ca2a1fa932ddd9c2a85d089f4e845 + + license: BSD-3-Clause + license-file: COPYING + +vendoring: + url: https://gitlab.xiph.org/xiph/speexdsp + source-hosting: gitlab + tracking: commit + + exclude: + - "**" + + keep: + - src/simd_detect.h + - src/simd_detect.cpp + + include: + - libspeexdsp/resample.c + - libspeexdsp/resample_sse.h + - libspeexdsp/resample_neon.h + - libspeexdsp/arch.h + - libspeexdsp/fixed_generic.h + - include/speex/speex_resampler.h + - AUTHORS + - COPYING + + update-actions: + - action: move-file + from: '{vendor_dir}/libspeexdsp/resample.c' + to: '{vendor_dir}/src/resample.c' + - action: move-file + from: '{vendor_dir}/libspeexdsp/resample_sse.h' + to: '{vendor_dir}/src/resample_sse.c' + - action: move-file + from: '{vendor_dir}/libspeexdsp/resample_neon.h' + to: '{vendor_dir}/src/resample_neon.c' + - action: move-file + from: '{vendor_dir}/libspeexdsp/arch.h' + to: '{vendor_dir}/src/arch.h' + - action: move-file + from: '{vendor_dir}/libspeexdsp/fixed_generic.h' + to: '{vendor_dir}/src/fixed_generic.h' + - action: move-file + from: '{vendor_dir}/include/speex/speex_resampler.h' + to: '{vendor_dir}/src/speex_resampler.h' + + patches: + - 01_outside-speex.patch + - 02_simd-detect-runtime.patch + - 03_set-skip-frac.patch + - 04_hugemem.patch + - 05_set-rate-overflow-no-return.patch diff --git a/media/libspeex_resampler/src/arch.h b/media/libspeex_resampler/src/arch.h new file mode 100644 index 0000000000..1cac3d9c89 --- /dev/null +++ b/media/libspeex_resampler/src/arch.h @@ -0,0 +1,232 @@ +/* Copyright (C) 2003 Jean-Marc Valin */ +/** + @file arch.h + @brief Various architecture definitions Speex +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef ARCH_H +#define ARCH_H + +/* A couple test to catch stupid option combinations */ +#ifdef FIXED_POINT + +#ifdef FLOATING_POINT +#error You cannot compile as floating point and fixed point at the same time +#endif +#ifdef USE_SSE +#error SSE is only for floating-point +#endif +#if defined(ARM4_ASM) + defined(ARM5E_ASM) + defined(BFIN_ASM) > 1 +#error Make up your mind. What CPU do you have? +#endif +#ifdef VORBIS_PSYCHO +#error Vorbis-psy model currently not implemented in fixed-point +#endif + +#else + +#ifndef FLOATING_POINT +#error You now need to define either FIXED_POINT or FLOATING_POINT +#endif +#if defined(ARM4_ASM) || defined(ARM5E_ASM) || defined(BFIN_ASM) +#error I suppose you can have a [ARM4/ARM5E/Blackfin] that has float instructions? +#endif +#ifdef FIXED_DEBUG +#error "Don't you think enabling fixed-point is a good thing to do if you want to debug that?" +#endif + + +#endif + +#ifndef OUTSIDE_SPEEX +#include "speex/speexdsp_types.h" +#endif + +#define ABS(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute integer value. */ +#define ABS16(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 16-bit value. */ +#define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Maximum 16-bit value. */ +#define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */ +#define ABS32(x) ((x) < 0 ? (-(x)) : (x)) /**< Absolute 32-bit value. */ +#define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Maximum 32-bit value. */ +#define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */ + +#ifdef FIXED_POINT + +typedef spx_int16_t spx_word16_t; +typedef spx_int32_t spx_word32_t; +typedef spx_word32_t spx_mem_t; +typedef spx_word16_t spx_coef_t; +typedef spx_word16_t spx_lsp_t; +typedef spx_word32_t spx_sig_t; + +#define Q15ONE 32767 + +#define LPC_SCALING 8192 +#define SIG_SCALING 16384 +#define LSP_SCALING 8192. +#define GAMMA_SCALING 32768. +#define GAIN_SCALING 64 +#define GAIN_SCALING_1 0.015625 + +#define LPC_SHIFT 13 +#define LSP_SHIFT 13 +#define SIG_SHIFT 14 +#define GAIN_SHIFT 6 + +#define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x))) + +#define VERY_SMALL 0 +#define VERY_LARGE32 ((spx_word32_t)2147483647) +#define VERY_LARGE16 ((spx_word16_t)32767) +#define Q15_ONE ((spx_word16_t)32767) + + +#ifdef FIXED_DEBUG +#include "fixed_debug.h" +#else + +#include "fixed_generic.h" + +#ifdef ARM5E_ASM +#include "fixed_arm5e.h" +#elif defined(ARM4_ASM) +#include "fixed_arm4.h" +#elif defined(BFIN_ASM) +#include "fixed_bfin.h" +#endif + +#endif + + +#else + +typedef float spx_mem_t; +typedef float spx_coef_t; +typedef float spx_lsp_t; +typedef float spx_sig_t; +typedef float spx_word16_t; +typedef float spx_word32_t; + +#define Q15ONE 1.0f +#define LPC_SCALING 1.f +#define SIG_SCALING 1.f +#define LSP_SCALING 1.f +#define GAMMA_SCALING 1.f +#define GAIN_SCALING 1.f +#define GAIN_SCALING_1 1.f + + +#define VERY_SMALL 1e-15f +#define VERY_LARGE32 1e15f +#define VERY_LARGE16 1e15f +#define Q15_ONE ((spx_word16_t)1.f) + +#define QCONST16(x,bits) (x) +#define QCONST32(x,bits) (x) + +#define NEG16(x) (-(x)) +#define NEG32(x) (-(x)) +#define EXTRACT16(x) (x) +#define EXTEND32(x) (x) +#define SHR16(a,shift) (a) +#define SHL16(a,shift) (a) +#define SHR32(a,shift) (a) +#define SHL32(a,shift) (a) +#define PSHR16(a,shift) (a) +#define PSHR32(a,shift) (a) +#define VSHR32(a,shift) (a) +#define SATURATE16(x,a) (x) +#define SATURATE32(x,a) (x) +#define SATURATE32PSHR(x,shift,a) (x) + +#define PSHR(a,shift) (a) +#define SHR(a,shift) (a) +#define SHL(a,shift) (a) +#define SATURATE(x,a) (x) + +#define ADD16(a,b) ((a)+(b)) +#define SUB16(a,b) ((a)-(b)) +#define ADD32(a,b) ((a)+(b)) +#define SUB32(a,b) ((a)-(b)) +#define MULT16_16_16(a,b) ((a)*(b)) +#define MULT16_32_32(a,b) ((a)*(b)) +#define MULT16_16(a,b) ((spx_word32_t)(a)*(spx_word32_t)(b)) +#define MAC16_16(c,a,b) ((c)+(spx_word32_t)(a)*(spx_word32_t)(b)) + +#define MULT16_32_Q15(a,b) ((a)*(b)) +#define MULT16_32_P15(a,b) ((a)*(b)) + +#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) + +#define MAC16_16_Q11(c,a,b) ((c)+(a)*(b)) +#define MAC16_16_Q13(c,a,b) ((c)+(a)*(b)) +#define MAC16_16_P13(c,a,b) ((c)+(a)*(b)) +#define MULT16_16_Q11_32(a,b) ((a)*(b)) +#define MULT16_16_Q13(a,b) ((a)*(b)) +#define MULT16_16_Q14(a,b) ((a)*(b)) +#define MULT16_16_Q15(a,b) ((a)*(b)) +#define MULT16_16_P15(a,b) ((a)*(b)) +#define MULT16_16_P13(a,b) ((a)*(b)) +#define MULT16_16_P14(a,b) ((a)*(b)) + +#define DIV32_16(a,b) (((spx_word32_t)(a))/(spx_word16_t)(b)) +#define PDIV32_16(a,b) (((spx_word32_t)(a))/(spx_word16_t)(b)) +#define DIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b)) +#define PDIV32(a,b) (((spx_word32_t)(a))/(spx_word32_t)(b)) + +#define WORD2INT(x) ((x) < -32767.5f ? -32768 : \ + ((x) > 32766.5f ? 32767 : (spx_int16_t)floor(.5 + (x)))) +#endif + + +#if defined(CONFIG_TI_C54X) || defined(CONFIG_TI_C55X) + +/* 2 on TI C5x DSP */ +#define BYTES_PER_CHAR 2 +#define BITS_PER_CHAR 16 +#define LOG2_BITS_PER_CHAR 4 + +#else + +#define BYTES_PER_CHAR 1 +#define BITS_PER_CHAR 8 +#define LOG2_BITS_PER_CHAR 3 + +#endif + + + +#ifdef FIXED_DEBUG +extern long long spx_mips; +#endif + + +#endif diff --git a/media/libspeex_resampler/src/fixed_generic.h b/media/libspeex_resampler/src/fixed_generic.h new file mode 100644 index 0000000000..09366c36ce --- /dev/null +++ b/media/libspeex_resampler/src/fixed_generic.h @@ -0,0 +1,106 @@ +/* Copyright (C) 2003 Jean-Marc Valin */ +/** + @file fixed_generic.h + @brief Generic fixed-point operations +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_GENERIC_H +#define FIXED_GENERIC_H + +#define QCONST16(x,bits) ((spx_word16_t)(.5+(x)*(((spx_word32_t)1)<<(bits)))) +#define QCONST32(x,bits) ((spx_word32_t)(.5+(x)*(((spx_word32_t)1)<<(bits)))) + +#define NEG16(x) (-(x)) +#define NEG32(x) (-(x)) +#define EXTRACT16(x) ((spx_word16_t)(x)) +#define EXTEND32(x) ((spx_word32_t)(x)) +#define SHR16(a,shift) ((a) >> (shift)) +#define SHL16(a,shift) ((a) << (shift)) +#define SHR32(a,shift) ((a) >> (shift)) +#define SHL32(a,shift) ((a) << (shift)) +#define PSHR16(a,shift) (SHR16((a)+((1<<((shift))>>1)),shift)) +#define PSHR32(a,shift) (SHR32((a)+((EXTEND32(1)<<((shift))>>1)),shift)) +#define VSHR32(a, shift) (((shift)>0) ? SHR32(a, shift) : SHL32(a, -(shift))) +#define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) +#define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) + +#define SATURATE32PSHR(x,shift,a) (((x)>=(SHL32(a,shift))) ? (a) : \ + (x)<=-(SHL32(a,shift)) ? -(a) : \ + (PSHR32(x, shift))) + +#define SHR(a,shift) ((a) >> (shift)) +#define SHL(a,shift) ((spx_word32_t)(a) << (shift)) +#define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift)) +#define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) + + +#define ADD16(a,b) ((spx_word16_t)((spx_word16_t)(a)+(spx_word16_t)(b))) +#define SUB16(a,b) ((spx_word16_t)(a)-(spx_word16_t)(b)) +#define ADD32(a,b) ((spx_word32_t)(a)+(spx_word32_t)(b)) +#define SUB32(a,b) ((spx_word32_t)(a)-(spx_word32_t)(b)) + + +/* result fits in 16 bits */ +#define MULT16_16_16(a,b) (((spx_word16_t)(a))*((spx_word16_t)(b))) +/* result fits in 32 bits */ +#define MULT16_32_32(a,b) (((spx_word16_t)(a))*((spx_word32_t)(b))) + +/* (spx_word32_t)(spx_word16_t) gives TI compiler a hint that it's 16x16->32 multiply */ +#define MULT16_16(a,b) (((spx_word32_t)(spx_word16_t)(a))*((spx_word32_t)(spx_word16_t)(b))) + +#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b)))) + +#define MULT16_32_P15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15)) +#define MULT16_32_Q15(a,b) ADD32(MULT16_32_32(a,SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)) +#define MAC16_32_Q15(c,a,b) ADD32(c,MULT16_32_Q15(a,b)) + + +#define MAC16_16_Q11(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),11))) +#define MAC16_16_Q13(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),13))) +#define MAC16_16_P13(c,a,b) (ADD32((c),SHR(ADD32(4096,MULT16_16((a),(b))),13))) + +#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) +#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) +#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) +#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) + +#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13)) +#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14)) +#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15)) + +#define MUL_16_32_R15(a,bh,bl) ADD32(MULT16_16((a),(bh)), SHR(MULT16_16((a),(bl)),15)) + +#define DIV32_16(a,b) ((spx_word16_t)(((spx_word32_t)(a))/((spx_word16_t)(b)))) +#define PDIV32_16(a,b) ((spx_word16_t)(((spx_word32_t)(a)+((spx_word16_t)(b)>>1))/((spx_word16_t)(b)))) +#define DIV32(a,b) (((spx_word32_t)(a))/((spx_word32_t)(b))) +#define PDIV32(a,b) (((spx_word32_t)(a)+((spx_word16_t)(b)>>1))/((spx_word32_t)(b))) + +#endif diff --git a/media/libspeex_resampler/src/moz.build b/media/libspeex_resampler/src/moz.build new file mode 100644 index 0000000000..1150722ca5 --- /dev/null +++ b/media/libspeex_resampler/src/moz.build @@ -0,0 +1,47 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Library('speex') + +EXPORTS.speex += [ + 'speex_resampler.h', +] + +SOURCES += [ + 'resample.c', + 'simd_detect.cpp', +] + +# We allow warnings for third-party code that can be updated from upstream. +AllowCompilerWarnings() + +FINAL_LIBRARY = 'gkmedias' + +# We don't compile the full speex codec, only the resampler. +DEFINES['OUTSIDE_SPEEX'] = True + +DEFINES['EXPORT'] = '' +DEFINES['FLOATING_POINT'] = True + +# Only use SSE code when using floating point samples, and on x86 +if CONFIG['INTEL_ARCHITECTURE']: + DEFINES['USE_SSE'] = True + DEFINES['USE_SSE2'] = True + SOURCES += [ + 'resample_sse.c' + ] + SOURCES['resample_sse.c'].flags += CONFIG['SSE2_FLAGS'] + +if CONFIG['TARGET_CPU'] == 'arm' and CONFIG['BUILD_ARM_NEON']: + DEFINES['USE_NEON'] = True + SOURCES += [ + 'resample_neon.c' + ] + SOURCES['resample_neon.c'].flags += CONFIG['NEON_FLAGS'] + +# Suppress warnings in third-party code. +if CONFIG['CC_TYPE'] in ('clang', 'gcc'): + CFLAGS += ['-Wno-sign-compare'] diff --git a/media/libspeex_resampler/src/resample.c b/media/libspeex_resampler/src/resample.c new file mode 100644 index 0000000000..b2efcca3a1 --- /dev/null +++ b/media/libspeex_resampler/src/resample.c @@ -0,0 +1,1268 @@ +/* Copyright (C) 2007-2008 Jean-Marc Valin + Copyright (C) 2008 Thorvald Natvig + + File: resample.c + Arbitrary resampling code + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + The design goals of this code are: + - Very fast algorithm + - SIMD-friendly algorithm + - Low memory requirement + - Good *perceptual* quality (and not best SNR) + + Warning: This resampler is relatively new. Although I think I got rid of + all the major bugs and I don't expect the API to change anymore, there + may be something I've missed. So use with caution. + + This algorithm is based on this original resampling algorithm: + Smith, Julius O. Digital Audio Resampling Home Page + Center for Computer Research in Music and Acoustics (CCRMA), + Stanford University, 2007. + Web published at https://ccrma.stanford.edu/~jos/resample/. + + There is one main difference, though. This resampler uses cubic + interpolation instead of linear interpolation in the above paper. This + makes the table much smaller and makes it possible to compute that table + on a per-stream basis. In turn, being able to tweak the table for each + stream makes it possible to both reduce complexity on simple ratios + (e.g. 2/3), and get rid of the rounding operations in the inner loop. + The latter both reduces CPU time and makes the algorithm more SIMD-friendly. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define RESAMPLE_HUGEMEM 1 + +#ifdef OUTSIDE_SPEEX +#include +static void *speex_alloc(int size) {return calloc(size,1);} +static void *speex_realloc(void *ptr, int size) {return realloc(ptr, size);} +static void speex_free(void *ptr) {free(ptr);} +#ifndef EXPORT +#define EXPORT +#endif +#include "speex_resampler.h" +#include "arch.h" +#else /* OUTSIDE_SPEEX */ + +#include "speex/speex_resampler.h" +#include "arch.h" +#include "os_support.h" +#endif /* OUTSIDE_SPEEX */ + +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#define IMAX(a,b) ((a) > (b) ? (a) : (b)) +#define IMIN(a,b) ((a) < (b) ? (a) : (b)) + +#ifndef NULL +#define NULL 0 +#endif + +#ifndef UINT32_MAX +#define UINT32_MAX 4294967295U +#endif + +#include "simd_detect.h" + +/* Number of elements to allocate on the stack */ +#ifdef VAR_ARRAYS +#define FIXED_STACK_ALLOC 8192 +#else +#define FIXED_STACK_ALLOC 1024 +#endif + +typedef int (*resampler_basic_func)(SpeexResamplerState *, spx_uint32_t , const spx_word16_t *, spx_uint32_t *, spx_word16_t *, spx_uint32_t *); + +struct SpeexResamplerState_ { + spx_uint32_t in_rate; + spx_uint32_t out_rate; + spx_uint32_t num_rate; + spx_uint32_t den_rate; + + int quality; + spx_uint32_t nb_channels; + spx_uint32_t filt_len; + spx_uint32_t mem_alloc_size; + spx_uint32_t buffer_size; + int int_advance; + int frac_advance; + float cutoff; + spx_uint32_t oversample; + int initialised; + int started; + + /* These are per-channel */ + spx_int32_t *last_sample; + spx_uint32_t *samp_frac_num; + spx_uint32_t *magic_samples; + + spx_word16_t *mem; + spx_word16_t *sinc_table; + spx_uint32_t sinc_table_length; + resampler_basic_func resampler_ptr; + + int in_stride; + int out_stride; +} ; + +static const double kaiser12_table[68] = { + 0.99859849, 1.00000000, 0.99859849, 0.99440475, 0.98745105, 0.97779076, + 0.96549770, 0.95066529, 0.93340547, 0.91384741, 0.89213598, 0.86843014, + 0.84290116, 0.81573067, 0.78710866, 0.75723148, 0.72629970, 0.69451601, + 0.66208321, 0.62920216, 0.59606986, 0.56287762, 0.52980938, 0.49704014, + 0.46473455, 0.43304576, 0.40211431, 0.37206735, 0.34301800, 0.31506490, + 0.28829195, 0.26276832, 0.23854851, 0.21567274, 0.19416736, 0.17404546, + 0.15530766, 0.13794294, 0.12192957, 0.10723616, 0.09382272, 0.08164178, + 0.07063950, 0.06075685, 0.05193064, 0.04409466, 0.03718069, 0.03111947, + 0.02584161, 0.02127838, 0.01736250, 0.01402878, 0.01121463, 0.00886058, + 0.00691064, 0.00531256, 0.00401805, 0.00298291, 0.00216702, 0.00153438, + 0.00105297, 0.00069463, 0.00043489, 0.00025272, 0.00013031, 0.0000527734, + 0.00001000, 0.00000000}; +/* +static const double kaiser12_table[36] = { + 0.99440475, 1.00000000, 0.99440475, 0.97779076, 0.95066529, 0.91384741, + 0.86843014, 0.81573067, 0.75723148, 0.69451601, 0.62920216, 0.56287762, + 0.49704014, 0.43304576, 0.37206735, 0.31506490, 0.26276832, 0.21567274, + 0.17404546, 0.13794294, 0.10723616, 0.08164178, 0.06075685, 0.04409466, + 0.03111947, 0.02127838, 0.01402878, 0.00886058, 0.00531256, 0.00298291, + 0.00153438, 0.00069463, 0.00025272, 0.0000527734, 0.00000500, 0.00000000}; +*/ +static const double kaiser10_table[36] = { + 0.99537781, 1.00000000, 0.99537781, 0.98162644, 0.95908712, 0.92831446, + 0.89005583, 0.84522401, 0.79486424, 0.74011713, 0.68217934, 0.62226347, + 0.56155915, 0.50119680, 0.44221549, 0.38553619, 0.33194107, 0.28205962, + 0.23636152, 0.19515633, 0.15859932, 0.12670280, 0.09935205, 0.07632451, + 0.05731132, 0.04193980, 0.02979584, 0.02044510, 0.01345224, 0.00839739, + 0.00488951, 0.00257636, 0.00115101, 0.00035515, 0.00000000, 0.00000000}; + +static const double kaiser8_table[36] = { + 0.99635258, 1.00000000, 0.99635258, 0.98548012, 0.96759014, 0.94302200, + 0.91223751, 0.87580811, 0.83439927, 0.78875245, 0.73966538, 0.68797126, + 0.63451750, 0.58014482, 0.52566725, 0.47185369, 0.41941150, 0.36897272, + 0.32108304, 0.27619388, 0.23465776, 0.19672670, 0.16255380, 0.13219758, + 0.10562887, 0.08273982, 0.06335451, 0.04724088, 0.03412321, 0.02369490, + 0.01563093, 0.00959968, 0.00527363, 0.00233883, 0.00050000, 0.00000000}; + +static const double kaiser6_table[36] = { + 0.99733006, 1.00000000, 0.99733006, 0.98935595, 0.97618418, 0.95799003, + 0.93501423, 0.90755855, 0.87598009, 0.84068475, 0.80211977, 0.76076565, + 0.71712752, 0.67172623, 0.62508937, 0.57774224, 0.53019925, 0.48295561, + 0.43647969, 0.39120616, 0.34752997, 0.30580127, 0.26632152, 0.22934058, + 0.19505503, 0.16360756, 0.13508755, 0.10953262, 0.08693120, 0.06722600, + 0.05031820, 0.03607231, 0.02432151, 0.01487334, 0.00752000, 0.00000000}; + +struct FuncDef { + const double *table; + int oversample; +}; + +static const struct FuncDef kaiser12_funcdef = {kaiser12_table, 64}; +#define KAISER12 (&kaiser12_funcdef) +static const struct FuncDef kaiser10_funcdef = {kaiser10_table, 32}; +#define KAISER10 (&kaiser10_funcdef) +static const struct FuncDef kaiser8_funcdef = {kaiser8_table, 32}; +#define KAISER8 (&kaiser8_funcdef) +static const struct FuncDef kaiser6_funcdef = {kaiser6_table, 32}; +#define KAISER6 (&kaiser6_funcdef) + +struct QualityMapping { + int base_length; + int oversample; + float downsample_bandwidth; + float upsample_bandwidth; + const struct FuncDef *window_func; +}; + + +/* This table maps conversion quality to internal parameters. There are two + reasons that explain why the up-sampling bandwidth is larger than the + down-sampling bandwidth: + 1) When up-sampling, we can assume that the spectrum is already attenuated + close to the Nyquist rate (from an A/D or a previous resampling filter) + 2) Any aliasing that occurs very close to the Nyquist rate will be masked + by the sinusoids/noise just below the Nyquist rate (guaranteed only for + up-sampling). +*/ +static const struct QualityMapping quality_map[11] = { + { 8, 4, 0.830f, 0.860f, KAISER6 }, /* Q0 */ + { 16, 4, 0.850f, 0.880f, KAISER6 }, /* Q1 */ + { 32, 4, 0.882f, 0.910f, KAISER6 }, /* Q2 */ /* 82.3% cutoff ( ~60 dB stop) 6 */ + { 48, 8, 0.895f, 0.917f, KAISER8 }, /* Q3 */ /* 84.9% cutoff ( ~80 dB stop) 8 */ + { 64, 8, 0.921f, 0.940f, KAISER8 }, /* Q4 */ /* 88.7% cutoff ( ~80 dB stop) 8 */ + { 80, 16, 0.922f, 0.940f, KAISER10}, /* Q5 */ /* 89.1% cutoff (~100 dB stop) 10 */ + { 96, 16, 0.940f, 0.945f, KAISER10}, /* Q6 */ /* 91.5% cutoff (~100 dB stop) 10 */ + {128, 16, 0.950f, 0.950f, KAISER10}, /* Q7 */ /* 93.1% cutoff (~100 dB stop) 10 */ + {160, 16, 0.960f, 0.960f, KAISER10}, /* Q8 */ /* 94.5% cutoff (~100 dB stop) 10 */ + {192, 32, 0.968f, 0.968f, KAISER12}, /* Q9 */ /* 95.5% cutoff (~100 dB stop) 10 */ + {256, 32, 0.975f, 0.975f, KAISER12}, /* Q10 */ /* 96.6% cutoff (~100 dB stop) 10 */ +}; +/*8,24,40,56,80,104,128,160,200,256,320*/ +static double compute_func(float x, const struct FuncDef *func) +{ + float y, frac; + double interp[4]; + int ind; + y = x*func->oversample; + ind = (int)floor(y); + frac = (y-ind); + /* CSE with handle the repeated powers */ + interp[3] = -0.1666666667*frac + 0.1666666667*(frac*frac*frac); + interp[2] = frac + 0.5*(frac*frac) - 0.5*(frac*frac*frac); + /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/ + interp[0] = -0.3333333333*frac + 0.5*(frac*frac) - 0.1666666667*(frac*frac*frac); + /* Just to make sure we don't have rounding problems */ + interp[1] = 1.f-interp[3]-interp[2]-interp[0]; + + /*sum = frac*accum[1] + (1-frac)*accum[2];*/ + return interp[0]*func->table[ind] + interp[1]*func->table[ind+1] + interp[2]*func->table[ind+2] + interp[3]*func->table[ind+3]; +} + +#if 0 +#include +int main(int argc, char **argv) +{ + int i; + for (i=0;i<256;i++) + { + printf ("%f\n", compute_func(i/256., KAISER12)); + } + return 0; +} +#endif + +#ifdef FIXED_POINT +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func) +{ + /*fprintf (stderr, "%f ", x);*/ + float xx = x * cutoff; + if (fabs(x)<1e-6f) + return WORD2INT(32768.*cutoff); + else if (fabs(x) > .5f*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return WORD2INT(32768.*cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func)); +} +#else +/* The slow way of computing a sinc for the table. Should improve that some day */ +static spx_word16_t sinc(float cutoff, float x, int N, const struct FuncDef *window_func) +{ + /*fprintf (stderr, "%f ", x);*/ + float xx = x * cutoff; + if (fabs(x)<1e-6) + return cutoff; + else if (fabs(x) > .5*N) + return 0; + /*FIXME: Can it really be any slower than this? */ + return cutoff*sin(M_PI*xx)/(M_PI*xx) * compute_func(fabs(2.*x/N), window_func); +} +#endif + +#ifdef FIXED_POINT +static void cubic_coef(spx_word16_t x, spx_word16_t interp[4]) +{ + /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation + but I know it's MMSE-optimal on a sinc */ + spx_word16_t x2, x3; + x2 = MULT16_16_P15(x, x); + x3 = MULT16_16_P15(x, x2); + interp[0] = PSHR32(MULT16_16(QCONST16(-0.16667f, 15),x) + MULT16_16(QCONST16(0.16667f, 15),x3),15); + interp[1] = EXTRACT16(EXTEND32(x) + SHR32(SUB32(EXTEND32(x2),EXTEND32(x3)),1)); + interp[3] = PSHR32(MULT16_16(QCONST16(-0.33333f, 15),x) + MULT16_16(QCONST16(.5f,15),x2) - MULT16_16(QCONST16(0.16667f, 15),x3),15); + /* Just to make sure we don't have rounding problems */ + interp[2] = Q15_ONE-interp[0]-interp[1]-interp[3]; + if (interp[2]<32767) + interp[2]+=1; +} +#else +static void cubic_coef(spx_word16_t frac, spx_word16_t interp[4]) +{ + /* Compute interpolation coefficients. I'm not sure whether this corresponds to cubic interpolation + but I know it's MMSE-optimal on a sinc */ + interp[0] = -0.16667f*frac + 0.16667f*frac*frac*frac; + interp[1] = frac + 0.5f*frac*frac - 0.5f*frac*frac*frac; + /*interp[2] = 1.f - 0.5f*frac - frac*frac + 0.5f*frac*frac*frac;*/ + interp[3] = -0.33333f*frac + 0.5f*frac*frac - 0.16667f*frac*frac*frac; + /* Just to make sure we don't have rounding problems */ + interp[2] = 1.-interp[0]-interp[1]-interp[3]; +} +#endif + +static int resampler_basic_direct_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +#ifdef OVERRIDE_INNER_PRODUCT_SINGLE + if (!moz_speex_have_single_simd()) { +#endif + int j; + sum = 0; + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +#ifdef FIXED_POINT +#else +/* This is the same as the previous function, except with a double-precision accumulator */ +static int resampler_basic_direct_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const spx_word16_t *sinc_table = st->sinc_table; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + double sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *sinct = & sinc_table[samp_frac_num*N]; + const spx_word16_t *iptr = & in[last_sample]; + +#ifdef OVERRIDE_INNER_PRODUCT_DOUBLE + if(moz_speex_have_double_simd()) { +#endif + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;j= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} +#endif + +static int resampler_basic_interpolate_single(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *iptr = & in[last_sample]; + + const int offset = samp_frac_num*st->oversample/st->den_rate; +#ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); +#else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; +#endif + spx_word16_t interp[4]; + + +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE + if (!moz_speex_have_single_simd()) { +#endif + int j; + spx_word32_t accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); + sum = SATURATE32PSHR(sum, 15, 32767); +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_SINGLE + } else { + cubic_coef(frac, interp); + sum = interpolate_product_single(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); + } +#endif + + out[out_stride * out_sample++] = sum; + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +#ifdef FIXED_POINT +#else +/* This is the same as the previous function, except with a double-precision accumulator */ +static int resampler_basic_interpolate_double(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + const int N = st->filt_len; + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + spx_word32_t sum; + + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + const spx_word16_t *iptr = & in[last_sample]; + + const int offset = samp_frac_num*st->oversample/st->den_rate; +#ifdef FIXED_POINT + const spx_word16_t frac = PDIV32(SHL32((samp_frac_num*st->oversample) % st->den_rate,15),st->den_rate); +#else + const spx_word16_t frac = ((float)((samp_frac_num*st->oversample) % st->den_rate))/st->den_rate; +#endif + spx_word16_t interp[4]; + + +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE + if (!moz_speex_have_double_simd()) { +#endif + int j; + double accum[4] = {0,0,0,0}; + + for(j=0;jsinc_table[4+(j+1)*st->oversample-offset-2]); + accum[1] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset-1]); + accum[2] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset]); + accum[3] += MULT16_16(curr_in,st->sinc_table[4+(j+1)*st->oversample-offset+1]); + } + + cubic_coef(frac, interp); + sum = MULT16_32_Q15(interp[0],accum[0]) + MULT16_32_Q15(interp[1],accum[1]) + MULT16_32_Q15(interp[2],accum[2]) + MULT16_32_Q15(interp[3],accum[3]); +#ifdef OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE + } else { + cubic_coef(frac, interp); + sum = interpolate_product_double(iptr, st->sinc_table + st->oversample + 4 - offset - 2, N, st->oversample, interp); + } +#endif + + out[out_stride * out_sample++] = PSHR32(sum,15); + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} +#endif + +/* This resampler is used to produce zero output in situations where memory + for the filter could not be allocated. The expected numbers of input and + output samples are still processed so that callers failing to check error + codes are not surprised, possibly getting into infinite loops. */ +static int resampler_basic_zero(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_word16_t *in, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int out_sample = 0; + int last_sample = st->last_sample[channel_index]; + spx_uint32_t samp_frac_num = st->samp_frac_num[channel_index]; + const int out_stride = st->out_stride; + const int int_advance = st->int_advance; + const int frac_advance = st->frac_advance; + const spx_uint32_t den_rate = st->den_rate; + + (void)in; + while (!(last_sample >= (spx_int32_t)*in_len || out_sample >= (spx_int32_t)*out_len)) + { + out[out_stride * out_sample++] = 0; + last_sample += int_advance; + samp_frac_num += frac_advance; + if (samp_frac_num >= den_rate) + { + samp_frac_num -= den_rate; + last_sample++; + } + } + + st->last_sample[channel_index] = last_sample; + st->samp_frac_num[channel_index] = samp_frac_num; + return out_sample; +} + +static int multiply_frac(spx_uint32_t *result, spx_uint32_t value, spx_uint32_t num, spx_uint32_t den) +{ + spx_uint32_t major = value / den; + spx_uint32_t remain = value % den; + /* TODO: Could use 64 bits operation to check for overflow. But only guaranteed in C99+ */ + if (remain > UINT32_MAX / num || major > UINT32_MAX / num + || major * num > UINT32_MAX - remain * num / den) + return RESAMPLER_ERR_OVERFLOW; + *result = remain * num / den + major * num; + return RESAMPLER_ERR_SUCCESS; +} + +static int update_filter(SpeexResamplerState *st) +{ + spx_uint32_t old_length = st->filt_len; + spx_uint32_t old_alloc_size = st->mem_alloc_size; + int use_direct; + spx_uint32_t min_sinc_table_length; + spx_uint32_t min_alloc_size; + + st->int_advance = st->num_rate/st->den_rate; + st->frac_advance = st->num_rate%st->den_rate; + st->oversample = quality_map[st->quality].oversample; + st->filt_len = quality_map[st->quality].base_length; + + if (st->num_rate > st->den_rate) + { + /* down-sampling */ + st->cutoff = quality_map[st->quality].downsample_bandwidth * st->den_rate / st->num_rate; + if (multiply_frac(&st->filt_len,st->filt_len,st->num_rate,st->den_rate) != RESAMPLER_ERR_SUCCESS) + goto fail; + /* Round up to make sure we have a multiple of 8 for SSE */ + st->filt_len = ((st->filt_len-1)&(~0x7))+8; + if (2*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (4*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (8*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (16*st->den_rate < st->num_rate) + st->oversample >>= 1; + if (st->oversample < 1) + st->oversample = 1; + } else { + /* up-sampling */ + st->cutoff = quality_map[st->quality].upsample_bandwidth; + } + + use_direct = +#ifdef RESAMPLE_HUGEMEM + /* Choose the direct resampler, even with higher initialization costs, + when resampling any multiple of 100 to 44100. */ + st->den_rate <= 441 +#else + /* Choose the resampling type that requires the least amount of memory */ + st->filt_len*st->den_rate <= st->filt_len*st->oversample+8 +#endif + && INT_MAX/sizeof(spx_word16_t)/st->den_rate >= st->filt_len; + if (use_direct) + { + min_sinc_table_length = st->filt_len*st->den_rate; + } else { + if ((INT_MAX/sizeof(spx_word16_t)-8)/st->oversample < st->filt_len) + goto fail; + + min_sinc_table_length = st->filt_len*st->oversample+8; + } + if (st->sinc_table_length < min_sinc_table_length) + { + spx_word16_t *sinc_table = (spx_word16_t *)speex_realloc(st->sinc_table,min_sinc_table_length*sizeof(spx_word16_t)); + if (!sinc_table) + goto fail; + + st->sinc_table = sinc_table; + st->sinc_table_length = min_sinc_table_length; + } + if (use_direct) + { + spx_uint32_t i; + for (i=0;iden_rate;i++) + { + spx_int32_t j; + for (j=0;jfilt_len;j++) + { + st->sinc_table[i*st->filt_len+j] = sinc(st->cutoff,((j-(spx_int32_t)st->filt_len/2+1)-((float)i)/st->den_rate), st->filt_len, quality_map[st->quality].window_func); + } + } +#ifdef FIXED_POINT + st->resampler_ptr = resampler_basic_direct_single; +#else + if (st->quality>8) + st->resampler_ptr = resampler_basic_direct_double; + else + st->resampler_ptr = resampler_basic_direct_single; +#endif + /*fprintf (stderr, "resampler uses direct sinc table and normalised cutoff %f\n", cutoff);*/ + } else { + spx_int32_t i; + for (i=-4;i<(spx_int32_t)(st->oversample*st->filt_len+4);i++) + st->sinc_table[i+4] = sinc(st->cutoff,(i/(float)st->oversample - st->filt_len/2), st->filt_len, quality_map[st->quality].window_func); +#ifdef FIXED_POINT + st->resampler_ptr = resampler_basic_interpolate_single; +#else + if (st->quality>8) + st->resampler_ptr = resampler_basic_interpolate_double; + else + st->resampler_ptr = resampler_basic_interpolate_single; +#endif + /*fprintf (stderr, "resampler uses interpolated sinc table and normalised cutoff %f\n", cutoff);*/ + } + + /* Here's the place where we update the filter memory to take into account + the change in filter length. It's probably the messiest part of the code + due to handling of lots of corner cases. */ + + /* Adding buffer_size to filt_len won't overflow here because filt_len + could be multiplied by sizeof(spx_word16_t) above. */ + min_alloc_size = st->filt_len-1 + st->buffer_size; + if (min_alloc_size > st->mem_alloc_size) + { + spx_word16_t *mem; + if (INT_MAX/sizeof(spx_word16_t)/st->nb_channels < min_alloc_size) + goto fail; + else if (!(mem = (spx_word16_t*)speex_realloc(st->mem, st->nb_channels*min_alloc_size * sizeof(*mem)))) + goto fail; + + st->mem = mem; + st->mem_alloc_size = min_alloc_size; + } + if (!st->started) + { + spx_uint32_t i; + for (i=0;inb_channels*st->mem_alloc_size;i++) + st->mem[i] = 0; + /*speex_warning("reinit filter");*/ + } else if (st->filt_len > old_length) + { + spx_uint32_t i; + /* Increase the filter length */ + /*speex_warning("increase filter size");*/ + for (i=st->nb_channels;i--;) + { + spx_uint32_t j; + spx_uint32_t olen = old_length; + spx_uint32_t start = i*st->mem_alloc_size; + spx_uint32_t magic_samples = st->magic_samples[i]; + /*if (st->magic_samples[i])*/ + { + /* Try and remove the magic samples as if nothing had happened */ + + /* FIXME: This is wrong but for now we need it to avoid going over the array bounds */ + olen = old_length + 2*magic_samples; + for (j=old_length-1+magic_samples;j--;) + st->mem[start+j+magic_samples] = st->mem[i*old_alloc_size+j]; + for (j=0;jmem[start+j] = 0; + st->magic_samples[i] = 0; + } + if (st->filt_len > olen) + { + /* If the new filter length is still bigger than the "augmented" length */ + /* Copy data going backward */ + for (j=0;jmem[start+(st->filt_len-2-j)] = st->mem[start+(olen-2-j)]; + /* Then put zeros for lack of anything better */ + for (;jfilt_len-1;j++) + st->mem[start+(st->filt_len-2-j)] = 0; + /* Adjust last_sample */ + st->last_sample[i] += (st->filt_len - olen)/2; + } else { + /* Put back some of the magic! */ + magic_samples = (olen - st->filt_len)/2; + for (j=0;jfilt_len-1+magic_samples;j++) + st->mem[start+j] = st->mem[start+j+magic_samples]; + st->magic_samples[i] = magic_samples; + } + } + } else if (st->filt_len < old_length) + { + spx_uint32_t i; + /* Reduce filter length, this a bit tricky. We need to store some of the memory as "magic" + samples so they can be used directly as input the next time(s) */ + for (i=0;inb_channels;i++) + { + spx_uint32_t j; + spx_uint32_t old_magic = st->magic_samples[i]; + st->magic_samples[i] = (old_length - st->filt_len)/2; + /* We must copy some of the memory that's no longer used */ + /* Copy data going backward */ + for (j=0;jfilt_len-1+st->magic_samples[i]+old_magic;j++) + st->mem[i*st->mem_alloc_size+j] = st->mem[i*st->mem_alloc_size+j+st->magic_samples[i]]; + st->magic_samples[i] += old_magic; + } + } + return RESAMPLER_ERR_SUCCESS; + +fail: + st->resampler_ptr = resampler_basic_zero; + /* st->mem may still contain consumed input samples for the filter. + Restore filt_len so that filt_len - 1 still points to the position after + the last of these samples. */ + st->filt_len = old_length; + return RESAMPLER_ERR_ALLOC_FAILED; +} + +EXPORT SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err) +{ + return speex_resampler_init_frac(nb_channels, in_rate, out_rate, in_rate, out_rate, quality, err); +} + +EXPORT SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate, int quality, int *err) +{ + SpeexResamplerState *st; + int filter_err; + + if (nb_channels == 0 || ratio_num == 0 || ratio_den == 0 || quality > 10 || quality < 0) + { + if (err) + *err = RESAMPLER_ERR_INVALID_ARG; + return NULL; + } + st = (SpeexResamplerState *)speex_alloc(sizeof(SpeexResamplerState)); + if (!st) + { + if (err) + *err = RESAMPLER_ERR_ALLOC_FAILED; + return NULL; + } + st->initialised = 0; + st->started = 0; + st->in_rate = 0; + st->out_rate = 0; + st->num_rate = 0; + st->den_rate = 0; + st->quality = -1; + st->sinc_table_length = 0; + st->mem_alloc_size = 0; + st->filt_len = 0; + st->mem = 0; + st->resampler_ptr = 0; + + st->cutoff = 1.f; + st->nb_channels = nb_channels; + st->in_stride = 1; + st->out_stride = 1; + + st->buffer_size = 160; + + /* Per channel data */ + if (!(st->last_sample = (spx_int32_t*)speex_alloc(nb_channels*sizeof(spx_int32_t)))) + goto fail; + if (!(st->magic_samples = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t)))) + goto fail; + if (!(st->samp_frac_num = (spx_uint32_t*)speex_alloc(nb_channels*sizeof(spx_uint32_t)))) + goto fail; + + speex_resampler_set_quality(st, quality); + speex_resampler_set_rate_frac(st, ratio_num, ratio_den, in_rate, out_rate); + + filter_err = update_filter(st); + if (filter_err == RESAMPLER_ERR_SUCCESS) + { + st->initialised = 1; + } else { + speex_resampler_destroy(st); + st = NULL; + } + if (err) + *err = filter_err; + + return st; + +fail: + if (err) + *err = RESAMPLER_ERR_ALLOC_FAILED; + speex_resampler_destroy(st); + return NULL; +} + +EXPORT void speex_resampler_destroy(SpeexResamplerState *st) +{ + speex_free(st->mem); + speex_free(st->sinc_table); + speex_free(st->last_sample); + speex_free(st->magic_samples); + speex_free(st->samp_frac_num); + speex_free(st); +} + +static int speex_resampler_process_native(SpeexResamplerState *st, spx_uint32_t channel_index, spx_uint32_t *in_len, spx_word16_t *out, spx_uint32_t *out_len) +{ + int j=0; + const int N = st->filt_len; + int out_sample = 0; + spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size; + spx_uint32_t ilen; + + st->started = 1; + + /* Call the right resampler through the function ptr */ + out_sample = st->resampler_ptr(st, channel_index, mem, in_len, out, out_len); + + if (st->last_sample[channel_index] < (spx_int32_t)*in_len) + *in_len = st->last_sample[channel_index]; + *out_len = out_sample; + st->last_sample[channel_index] -= *in_len; + + ilen = *in_len; + + for(j=0;jmagic_samples[channel_index]; + spx_word16_t *mem = st->mem + channel_index * st->mem_alloc_size; + const int N = st->filt_len; + + speex_resampler_process_native(st, channel_index, &tmp_in_len, *out, &out_len); + + st->magic_samples[channel_index] -= tmp_in_len; + + /* If we couldn't process all "magic" input samples, save the rest for next time */ + if (st->magic_samples[channel_index]) + { + spx_uint32_t i; + for (i=0;imagic_samples[channel_index];i++) + mem[N-1+i]=mem[N-1+i+tmp_in_len]; + } + *out += out_len*st->out_stride; + return out_len; +} + +#ifdef FIXED_POINT +EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +#else +EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +#endif +{ + int j; + spx_uint32_t ilen = *in_len; + spx_uint32_t olen = *out_len; + spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size; + const int filt_offs = st->filt_len - 1; + const spx_uint32_t xlen = st->mem_alloc_size - filt_offs; + const int istride = st->in_stride; + + if (st->magic_samples[channel_index]) + olen -= speex_resampler_magic(st, channel_index, &out, olen); + if (! st->magic_samples[channel_index]) { + while (ilen && olen) { + spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen; + spx_uint32_t ochunk = olen; + + if (in) { + for(j=0;jout_stride; + if (in) + in += ichunk * istride; + } + } + *in_len -= ilen; + *out_len -= olen; + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +#ifdef FIXED_POINT +EXPORT int speex_resampler_process_float(SpeexResamplerState *st, spx_uint32_t channel_index, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +#else +EXPORT int speex_resampler_process_int(SpeexResamplerState *st, spx_uint32_t channel_index, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +#endif +{ + int j; + const int istride_save = st->in_stride; + const int ostride_save = st->out_stride; + spx_uint32_t ilen = *in_len; + spx_uint32_t olen = *out_len; + spx_word16_t *x = st->mem + channel_index * st->mem_alloc_size; + const spx_uint32_t xlen = st->mem_alloc_size - (st->filt_len - 1); +#ifdef VAR_ARRAYS + const unsigned int ylen = (olen < FIXED_STACK_ALLOC) ? olen : FIXED_STACK_ALLOC; + spx_word16_t ystack[ylen]; +#else + const unsigned int ylen = FIXED_STACK_ALLOC; + spx_word16_t ystack[FIXED_STACK_ALLOC]; +#endif + + st->out_stride = 1; + + while (ilen && olen) { + spx_word16_t *y = ystack; + spx_uint32_t ichunk = (ilen > xlen) ? xlen : ilen; + spx_uint32_t ochunk = (olen > ylen) ? ylen : olen; + spx_uint32_t omagic = 0; + + if (st->magic_samples[channel_index]) { + omagic = speex_resampler_magic(st, channel_index, &y, ochunk); + ochunk -= omagic; + olen -= omagic; + } + if (! st->magic_samples[channel_index]) { + if (in) { + for(j=0;jfilt_len-1]=WORD2INT(in[j*istride_save]); +#else + x[j+st->filt_len-1]=in[j*istride_save]; +#endif + } else { + for(j=0;jfilt_len-1]=0; + } + + speex_resampler_process_native(st, channel_index, &ichunk, y, &ochunk); + } else { + ichunk = 0; + ochunk = 0; + } + + for (j=0;jout_stride = ostride_save; + *in_len -= ilen; + *out_len -= olen; + + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_process_interleaved_float(SpeexResamplerState *st, const float *in, spx_uint32_t *in_len, float *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; + spx_uint32_t bak_out_len = *out_len; + spx_uint32_t bak_in_len = *in_len; + istride_save = st->in_stride; + ostride_save = st->out_stride; + st->in_stride = st->out_stride = st->nb_channels; + for (i=0;inb_channels;i++) + { + *out_len = bak_out_len; + *in_len = bak_in_len; + if (in != NULL) + speex_resampler_process_float(st, i, in+i, in_len, out+i, out_len); + else + speex_resampler_process_float(st, i, NULL, in_len, out+i, out_len); + } + st->in_stride = istride_save; + st->out_stride = ostride_save; + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_process_interleaved_int(SpeexResamplerState *st, const spx_int16_t *in, spx_uint32_t *in_len, spx_int16_t *out, spx_uint32_t *out_len) +{ + spx_uint32_t i; + int istride_save, ostride_save; + spx_uint32_t bak_out_len = *out_len; + spx_uint32_t bak_in_len = *in_len; + istride_save = st->in_stride; + ostride_save = st->out_stride; + st->in_stride = st->out_stride = st->nb_channels; + for (i=0;inb_channels;i++) + { + *out_len = bak_out_len; + *in_len = bak_in_len; + if (in != NULL) + speex_resampler_process_int(st, i, in+i, in_len, out+i, out_len); + else + speex_resampler_process_int(st, i, NULL, in_len, out+i, out_len); + } + st->in_stride = istride_save; + st->out_stride = ostride_save; + return st->resampler_ptr == resampler_basic_zero ? RESAMPLER_ERR_ALLOC_FAILED : RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_set_rate(SpeexResamplerState *st, spx_uint32_t in_rate, spx_uint32_t out_rate) +{ + return speex_resampler_set_rate_frac(st, in_rate, out_rate, in_rate, out_rate); +} + +EXPORT void speex_resampler_get_rate(SpeexResamplerState *st, spx_uint32_t *in_rate, spx_uint32_t *out_rate) +{ + *in_rate = st->in_rate; + *out_rate = st->out_rate; +} + +static inline spx_uint32_t compute_gcd(spx_uint32_t a, spx_uint32_t b) +{ + while (b != 0) + { + spx_uint32_t temp = a; + + a = b; + b = temp % b; + } + return a; +} + +EXPORT int speex_resampler_set_rate_frac(SpeexResamplerState *st, spx_uint32_t ratio_num, spx_uint32_t ratio_den, spx_uint32_t in_rate, spx_uint32_t out_rate) +{ + spx_uint32_t fact; + spx_uint32_t old_den; + spx_uint32_t i; + + if (ratio_num == 0 || ratio_den == 0) + return RESAMPLER_ERR_INVALID_ARG; + + if (st->in_rate == in_rate && st->out_rate == out_rate && st->num_rate == ratio_num && st->den_rate == ratio_den) + return RESAMPLER_ERR_SUCCESS; + + old_den = st->den_rate; + st->in_rate = in_rate; + st->out_rate = out_rate; + st->num_rate = ratio_num; + st->den_rate = ratio_den; + + fact = compute_gcd(st->num_rate, st->den_rate); + + st->num_rate /= fact; + st->den_rate /= fact; + + if (old_den > 0) + { + for (i=0;inb_channels;i++) + { + if (multiply_frac(&st->samp_frac_num[i],st->samp_frac_num[i],st->den_rate,old_den) != RESAMPLER_ERR_SUCCESS) { + st->samp_frac_num[i] = st->den_rate-1; + } + /* Safety net */ + if (st->samp_frac_num[i] >= st->den_rate) + st->samp_frac_num[i] = st->den_rate-1; + } + } + + if (st->initialised) + return update_filter(st); + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT void speex_resampler_get_ratio(SpeexResamplerState *st, spx_uint32_t *ratio_num, spx_uint32_t *ratio_den) +{ + *ratio_num = st->num_rate; + *ratio_den = st->den_rate; +} + +EXPORT int speex_resampler_set_quality(SpeexResamplerState *st, int quality) +{ + if (quality > 10 || quality < 0) + return RESAMPLER_ERR_INVALID_ARG; + if (st->quality == quality) + return RESAMPLER_ERR_SUCCESS; + st->quality = quality; + if (st->initialised) + return update_filter(st); + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT void speex_resampler_get_quality(SpeexResamplerState *st, int *quality) +{ + *quality = st->quality; +} + +EXPORT void speex_resampler_set_input_stride(SpeexResamplerState *st, spx_uint32_t stride) +{ + st->in_stride = stride; +} + +EXPORT void speex_resampler_get_input_stride(SpeexResamplerState *st, spx_uint32_t *stride) +{ + *stride = st->in_stride; +} + +EXPORT void speex_resampler_set_output_stride(SpeexResamplerState *st, spx_uint32_t stride) +{ + st->out_stride = stride; +} + +EXPORT void speex_resampler_get_output_stride(SpeexResamplerState *st, spx_uint32_t *stride) +{ + *stride = st->out_stride; +} + +EXPORT int speex_resampler_get_input_latency(SpeexResamplerState *st) +{ + return st->filt_len / 2; +} + +EXPORT int speex_resampler_get_output_latency(SpeexResamplerState *st) +{ + return ((st->filt_len / 2) * st->den_rate + (st->num_rate >> 1)) / st->num_rate; +} + +EXPORT int speex_resampler_skip_zeros(SpeexResamplerState *st) +{ + spx_uint32_t i; + for (i=0;inb_channels;i++) + st->last_sample[i] = st->filt_len/2; + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_set_skip_frac_num(SpeexResamplerState *st, spx_uint32_t skip_frac_num) +{ + spx_uint32_t i; + spx_uint32_t last_sample = skip_frac_num / st->den_rate; + spx_uint32_t samp_frac_num = skip_frac_num % st->den_rate; + for (i=0;inb_channels;i++) { + st->last_sample[i] = last_sample; + st->samp_frac_num[i] = samp_frac_num; + } + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT int speex_resampler_reset_mem(SpeexResamplerState *st) +{ + spx_uint32_t i; + for (i=0;inb_channels;i++) + { + st->last_sample[i] = 0; + st->magic_samples[i] = 0; + st->samp_frac_num[i] = 0; + } + for (i=0;inb_channels*(st->filt_len-1);i++) + st->mem[i] = 0; + return RESAMPLER_ERR_SUCCESS; +} + +EXPORT const char *speex_resampler_strerror(int err) +{ + switch (err) + { + case RESAMPLER_ERR_SUCCESS: + return "Success."; + case RESAMPLER_ERR_ALLOC_FAILED: + return "Memory allocation failed."; + case RESAMPLER_ERR_BAD_STATE: + return "Bad resampler state."; + case RESAMPLER_ERR_INVALID_ARG: + return "Invalid argument."; + case RESAMPLER_ERR_PTR_OVERLAP: + return "Input and output buffers overlap."; + default: + return "Unknown error. Bad error code or strange version mismatch."; + } +} diff --git a/media/libspeex_resampler/src/resample_neon.c b/media/libspeex_resampler/src/resample_neon.c new file mode 100644 index 0000000000..ba97637af3 --- /dev/null +++ b/media/libspeex_resampler/src/resample_neon.c @@ -0,0 +1,342 @@ +/* Copyright (C) 2007-2008 Jean-Marc Valin + * Copyright (C) 2008 Thorvald Natvig + * Copyright (C) 2011 Texas Instruments + * author Jyri Sarha + */ +/** + @file resample_neon.h + @brief Resampler functions (NEON version) +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "simd_detect.h" + +#ifdef FIXED_POINT +#if defined(__aarch64__) +static inline int32_t saturate_32bit_to_16bit(int32_t a) { + int32_t ret; + asm ("fmov s0, %w[a]\n" + "sqxtn h0, s0\n" + "sxtl v0.4s, v0.4h\n" + "fmov %w[ret], s0\n" + : [ret] "=r" (ret) + : [a] "r" (a) + : "v0" ); + return ret; +} +#elif defined(__thumb2__) +static inline int32_t saturate_32bit_to_16bit(int32_t a) { + int32_t ret; + asm ("ssat %[ret], #16, %[a]" + : [ret] "=r" (ret) + : [a] "r" (a) + : ); + return ret; +} +#else +static inline int32_t saturate_32bit_to_16bit(int32_t a) { + int32_t ret; + asm ("vmov.s32 d0[0], %[a]\n" + "vqmovn.s32 d0, q0\n" + "vmov.s16 %[ret], d0[0]\n" + : [ret] "=r" (ret) + : [a] "r" (a) + : "q0"); + return ret; +} +#endif +#undef WORD2INT +#define WORD2INT(x) (saturate_32bit_to_16bit(x)) + +#define OVERRIDE_INNER_PRODUCT_SINGLE +/* Only works when len % 4 == 0 and len >= 4 */ +#if defined(__aarch64__) +inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) +{ + int32_t ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %w[len], #0\n" + " b.ne 1f\n" + " ld1 {v16.4h}, [%[b]], #8\n" + " ld1 {v20.4h}, [%[a]], #8\n" + " subs %w[remainder], %w[remainder], #4\n" + " smull v0.4s, v16.4h, v20.4h\n" + " b.ne 4f\n" + " b 5f\n" + "1:" + " ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n" + " ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n" + " subs %w[len], %w[len], #16\n" + " smull v0.4s, v16.4h, v20.4h\n" + " smlal v0.4s, v17.4h, v21.4h\n" + " smlal v0.4s, v18.4h, v22.4h\n" + " smlal v0.4s, v19.4h, v23.4h\n" + " b.eq 3f\n" + "2:" + " ld1 {v16.4h, v17.4h, v18.4h, v19.4h}, [%[b]], #32\n" + " ld1 {v20.4h, v21.4h, v22.4h, v23.4h}, [%[a]], #32\n" + " subs %w[len], %w[len], #16\n" + " smlal v0.4s, v16.4h, v20.4h\n" + " smlal v0.4s, v17.4h, v21.4h\n" + " smlal v0.4s, v18.4h, v22.4h\n" + " smlal v0.4s, v19.4h, v23.4h\n" + " b.ne 2b\n" + "3:" + " cmp %w[remainder], #0\n" + " b.eq 5f\n" + "4:" + " ld1 {v18.4h}, [%[b]], #8\n" + " ld1 {v22.4h}, [%[a]], #8\n" + " subs %w[remainder], %w[remainder], #4\n" + " smlal v0.4s, v18.4h, v22.4h\n" + " b.ne 4b\n" + "5:" + " saddlv d0, v0.4s\n" + " sqxtn s0, d0\n" + " sqrshrn h0, s0, #15\n" + " sxtl v0.4s, v0.4h\n" + " fmov %w[ret], s0\n" + : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b), + [len] "+r" (len), [remainder] "+r" (remainder) + : + : "cc", "v0", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); + return ret; +} +#else +inline int32_t inner_product_single(const int16_t *a, const int16_t *b, unsigned int len) +{ + int32_t ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %[len], #0\n" + " bne 1f\n" + " vld1.16 {d16}, [%[b]]!\n" + " vld1.16 {d20}, [%[a]]!\n" + " subs %[remainder], %[remainder], #4\n" + " vmull.s16 q0, d16, d20\n" + " beq 5f\n" + " b 4f\n" + "1:" + " vld1.16 {d16, d17, d18, d19}, [%[b]]!\n" + " vld1.16 {d20, d21, d22, d23}, [%[a]]!\n" + " subs %[len], %[len], #16\n" + " vmull.s16 q0, d16, d20\n" + " vmlal.s16 q0, d17, d21\n" + " vmlal.s16 q0, d18, d22\n" + " vmlal.s16 q0, d19, d23\n" + " beq 3f\n" + "2:" + " vld1.16 {d16, d17, d18, d19}, [%[b]]!\n" + " vld1.16 {d20, d21, d22, d23}, [%[a]]!\n" + " subs %[len], %[len], #16\n" + " vmlal.s16 q0, d16, d20\n" + " vmlal.s16 q0, d17, d21\n" + " vmlal.s16 q0, d18, d22\n" + " vmlal.s16 q0, d19, d23\n" + " bne 2b\n" + "3:" + " cmp %[remainder], #0\n" + " beq 5f\n" + "4:" + " vld1.16 {d16}, [%[b]]!\n" + " vld1.16 {d20}, [%[a]]!\n" + " subs %[remainder], %[remainder], #4\n" + " vmlal.s16 q0, d16, d20\n" + " bne 4b\n" + "5:" + " vaddl.s32 q0, d0, d1\n" + " vadd.s64 d0, d0, d1\n" + " vqmovn.s64 d0, q0\n" + " vqrshrn.s32 d0, q0, #15\n" + " vmov.s16 %[ret], d0[0]\n" + : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b), + [len] "+r" (len), [remainder] "+r" (remainder) + : + : "cc", "q0", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23"); + + return ret; +} +#endif // !defined(__aarch64__) + +#elif defined(FLOATING_POINT) +#if defined(__aarch64__) +static inline int32_t saturate_float_to_16bit(float a) { + int32_t ret; + asm ("fcvtas s1, %s[a]\n" + "sqxtn h1, s1\n" + "sxtl v1.4s, v1.4h\n" + "fmov %w[ret], s1\n" + : [ret] "=r" (ret) + : [a] "w" (a) + : "v1"); + return ret; +} +#else +static inline int32_t saturate_float_to_16bit(float a) { + int32_t ret; + asm ("vmov.f32 d0[0], %[a]\n" + "vcvt.s32.f32 d0, d0, #15\n" + "vqrshrn.s32 d0, q0, #15\n" + "vmov.s16 %[ret], d0[0]\n" + : [ret] "=r" (ret) + : [a] "r" (a) + : "q0"); + return ret; +} +#endif + +#undef WORD2INT +#define WORD2INT(x) (saturate_float_to_16bit(x)) + +#define OVERRIDE_INNER_PRODUCT_SINGLE +/* Only works when len % 4 == 0 and len >= 4 */ +#if defined(__aarch64__) +inline float inner_product_single(const float *a, const float *b, unsigned int len) +{ + float ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %w[len], #0\n" + " b.ne 1f\n" + " ld1 {v16.4s}, [%[b]], #16\n" + " ld1 {v20.4s}, [%[a]], #16\n" + " subs %w[remainder], %w[remainder], #4\n" + " fmul v1.4s, v16.4s, v20.4s\n" + " b.ne 4f\n" + " b 5f\n" + "1:" + " ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n" + " ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n" + " subs %w[len], %w[len], #16\n" + " fmul v1.4s, v16.4s, v20.4s\n" + " fmul v2.4s, v17.4s, v21.4s\n" + " fmul v3.4s, v18.4s, v22.4s\n" + " fmul v4.4s, v19.4s, v23.4s\n" + " b.eq 3f\n" + "2:" + " ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [%[b]], #64\n" + " ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [%[a]], #64\n" + " subs %w[len], %w[len], #16\n" + " fmla v1.4s, v16.4s, v20.4s\n" + " fmla v2.4s, v17.4s, v21.4s\n" + " fmla v3.4s, v18.4s, v22.4s\n" + " fmla v4.4s, v19.4s, v23.4s\n" + " b.ne 2b\n" + "3:" + " fadd v16.4s, v1.4s, v2.4s\n" + " fadd v17.4s, v3.4s, v4.4s\n" + " cmp %w[remainder], #0\n" + " fadd v1.4s, v16.4s, v17.4s\n" + " b.eq 5f\n" + "4:" + " ld1 {v18.4s}, [%[b]], #16\n" + " ld1 {v22.4s}, [%[a]], #16\n" + " subs %w[remainder], %w[remainder], #4\n" + " fmla v1.4s, v18.4s, v22.4s\n" + " b.ne 4b\n" + "5:" + " faddp v1.4s, v1.4s, v1.4s\n" + " faddp %[ret].4s, v1.4s, v1.4s\n" + : [ret] "=w" (ret), [a] "+r" (a), [b] "+r" (b), + [len] "+r" (len), [remainder] "+r" (remainder) + : + : "cc", "v1", "v2", "v3", "v4", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); + return ret; +} +#else +inline float inner_product_single(const float *a, const float *b, unsigned int len) +{ + float ret; + uint32_t remainder = len % 16; + len = len - remainder; + + asm volatile (" cmp %[len], #0\n" + " bne 1f\n" + " vld1.32 {q4}, [%[b]]!\n" + " vld1.32 {q8}, [%[a]]!\n" + " subs %[remainder], %[remainder], #4\n" + " vmul.f32 q0, q4, q8\n" + " bne 4f\n" + " b 5f\n" + "1:" + " vld1.32 {q4, q5}, [%[b]]!\n" + " vld1.32 {q8, q9}, [%[a]]!\n" + " vld1.32 {q6, q7}, [%[b]]!\n" + " vld1.32 {q10, q11}, [%[a]]!\n" + " subs %[len], %[len], #16\n" + " vmul.f32 q0, q4, q8\n" + " vmul.f32 q1, q5, q9\n" + " vmul.f32 q2, q6, q10\n" + " vmul.f32 q3, q7, q11\n" + " beq 3f\n" + "2:" + " vld1.32 {q4, q5}, [%[b]]!\n" + " vld1.32 {q8, q9}, [%[a]]!\n" + " vld1.32 {q6, q7}, [%[b]]!\n" + " vld1.32 {q10, q11}, [%[a]]!\n" + " subs %[len], %[len], #16\n" + " vmla.f32 q0, q4, q8\n" + " vmla.f32 q1, q5, q9\n" + " vmla.f32 q2, q6, q10\n" + " vmla.f32 q3, q7, q11\n" + " bne 2b\n" + "3:" + " vadd.f32 q4, q0, q1\n" + " vadd.f32 q5, q2, q3\n" + " cmp %[remainder], #0\n" + " vadd.f32 q0, q4, q5\n" + " beq 5f\n" + "4:" + " vld1.32 {q6}, [%[b]]!\n" + " vld1.32 {q10}, [%[a]]!\n" + " subs %[remainder], %[remainder], #4\n" + " vmla.f32 q0, q6, q10\n" + " bne 4b\n" + "5:" + " vadd.f32 d0, d0, d1\n" + " vpadd.f32 d0, d0, d0\n" + " vmov.f32 %[ret], d0[0]\n" + : [ret] "=r" (ret), [a] "+r" (a), [b] "+r" (b), + [len] "+l" (len), [remainder] "+l" (remainder) + : + : "cc", "q0", "q1", "q2", "q3", + "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11"); + return ret; +} +#endif // defined(__aarch64__) +#endif diff --git a/media/libspeex_resampler/src/resample_sse.c b/media/libspeex_resampler/src/resample_sse.c new file mode 100644 index 0000000000..c3a864ccff --- /dev/null +++ b/media/libspeex_resampler/src/resample_sse.c @@ -0,0 +1,130 @@ +/* Copyright (C) 2007-2008 Jean-Marc Valin + * Copyright (C) 2008 Thorvald Natvig + */ +/** + @file resample_sse.h + @brief Resampler functions (SSE version) +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "simd_detect.h" + +#include + +#define OVERRIDE_INNER_PRODUCT_SINGLE +float inner_product_single(const float *a, const float *b, unsigned int len) +{ + int i; + float ret; + __m128 sum = _mm_setzero_ps(); + for (i=0;i +#define OVERRIDE_INNER_PRODUCT_DOUBLE + +double inner_product_double(const float *a, const float *b, unsigned int len) +{ + int i; + double ret; + __m128d sum = _mm_setzero_pd(); + __m128 t; + for (i=0;i