summaryrefslogtreecommitdiffstats
path: root/media/libopus
diff options
context:
space:
mode:
Diffstat (limited to 'media/libopus')
-rw-r--r--media/libopus/COPYING4
-rw-r--r--media/libopus/celt/arm/arm_celt_map.c31
-rw-r--r--media/libopus/celt/arm/armcpu.c51
-rw-r--r--media/libopus/celt/arm/armcpu.h13
-rw-r--r--media/libopus/celt/arm/celt_neon_intr.c83
-rw-r--r--media/libopus/celt/arm/pitch_neon_intr.c7
-rw-r--r--media/libopus/celt/celt.h25
-rw-r--r--media/libopus/celt/celt_decoder.c360
-rw-r--r--media/libopus/celt/celt_encoder.c68
-rw-r--r--media/libopus/celt/celt_lpc.c27
-rw-r--r--media/libopus/celt/celt_lpc.h2
-rw-r--r--media/libopus/celt/cpu_support.h7
-rw-r--r--media/libopus/celt/entdec.c21
-rw-r--r--media/libopus/celt/entdec.h10
-rw-r--r--media/libopus/celt/entenc.c11
-rw-r--r--media/libopus/celt/entenc.h9
-rw-r--r--media/libopus/celt/laplace.c101
-rw-r--r--media/libopus/celt/laplace.h9
-rw-r--r--media/libopus/celt/mathops.h6
-rw-r--r--media/libopus/celt/mips/celt_mipsr1.h6
-rw-r--r--media/libopus/celt/mips/mdct_mipsr1.h6
-rw-r--r--media/libopus/celt/mips/vq_mipsr1.h6
-rw-r--r--media/libopus/celt/os_support.h14
-rw-r--r--media/libopus/celt/pitch.c11
-rw-r--r--media/libopus/celt/pitch.h11
-rw-r--r--media/libopus/celt/stack_alloc.h2
-rw-r--r--media/libopus/celt/x86/celt_lpc_sse4_1.c13
-rw-r--r--media/libopus/celt/x86/pitch_avx.c101
-rw-r--r--media/libopus/celt/x86/pitch_sse.h40
-rw-r--r--media/libopus/celt/x86/vq_sse.h6
-rw-r--r--media/libopus/celt/x86/vq_sse2.c8
-rw-r--r--media/libopus/celt/x86/x86_arch_macros.h47
-rw-r--r--media/libopus/celt/x86/x86_celt_map.c20
-rw-r--r--media/libopus/celt/x86/x86cpu.c16
-rw-r--r--media/libopus/celt/x86/x86cpu.h49
-rw-r--r--media/libopus/include/opus.h122
-rw-r--r--media/libopus/include/opus_defines.h29
-rw-r--r--media/libopus/include/opus_multistream.h2
-rw-r--r--media/libopus/moz.build2
-rw-r--r--media/libopus/moz.yaml4
-rw-r--r--media/libopus/silk/API.h23
-rw-r--r--media/libopus/silk/NSQ.c2
-rw-r--r--media/libopus/silk/NSQ_del_dec.c2
-rw-r--r--media/libopus/silk/PLC.c61
-rw-r--r--media/libopus/silk/PLC.h3
-rw-r--r--media/libopus/silk/arm/NSQ_del_dec_arm.h4
-rw-r--r--media/libopus/silk/arm/NSQ_del_dec_neon_intr.c28
-rw-r--r--media/libopus/silk/arm/NSQ_neon.h4
-rw-r--r--media/libopus/silk/arm/arm_silk_map.c7
-rw-r--r--media/libopus/silk/control.h11
-rw-r--r--media/libopus/silk/dec_API.c69
-rw-r--r--media/libopus/silk/decode_frame.c60
-rw-r--r--media/libopus/silk/enc_API.c4
-rw-r--r--media/libopus/silk/fixed/encode_frame_FIX.c18
-rw-r--r--media/libopus/silk/float/SigProc_FLP.h14
-rw-r--r--media/libopus/silk/float/autocorrelation_FLP.c5
-rw-r--r--media/libopus/silk/float/burg_modified_FLP.c5
-rw-r--r--media/libopus/silk/float/corrMatrix_FLP.c10
-rw-r--r--media/libopus/silk/float/encode_frame_FLP.c17
-rw-r--r--media/libopus/silk/float/find_LPC_FLP.c7
-rw-r--r--media/libopus/silk/float/find_LTP_FLP.c7
-rw-r--r--media/libopus/silk/float/find_pitch_lags_FLP.c2
-rw-r--r--media/libopus/silk/float/find_pred_coefs_FLP.c4
-rw-r--r--media/libopus/silk/float/inner_product_FLP.c2
-rw-r--r--media/libopus/silk/float/main_FLP.h12
-rw-r--r--media/libopus/silk/float/noise_shape_analysis_FLP.c2
-rw-r--r--media/libopus/silk/float/pitch_analysis_core_FLP.c2
-rw-r--r--media/libopus/silk/float/warped_autocorrelation_FLP.c6
-rw-r--r--media/libopus/silk/float/x86/inner_product_FLP_avx2.c85
-rw-r--r--media/libopus/silk/init_decoder.c33
-rw-r--r--media/libopus/silk/init_encoder.c4
-rw-r--r--media/libopus/silk/main.h14
-rw-r--r--media/libopus/silk/mips/NSQ_del_dec_mipsr1.h6
-rw-r--r--media/libopus/silk/mips/macros_mipsr1.h6
-rw-r--r--media/libopus/silk/structs.h28
-rw-r--r--media/libopus/silk/x86/NSQ_del_dec_avx2.c1075
-rw-r--r--media/libopus/silk/x86/NSQ_del_dec_sse4_1.c18
-rw-r--r--media/libopus/silk/x86/NSQ_sse4_1.c38
-rw-r--r--media/libopus/silk/x86/VAD_sse4_1.c2
-rw-r--r--media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c2
-rw-r--r--media/libopus/silk/x86/main_sse.h62
-rw-r--r--media/libopus/silk/x86/x86_silk_map.c28
-rw-r--r--media/libopus/sources.mozbuild13
-rw-r--r--media/libopus/src/analysis.c6
-rw-r--r--media/libopus/src/extensions.c315
-rw-r--r--media/libopus/src/mapping_matrix.c561
-rw-r--r--media/libopus/src/mapping_matrix.h12
-rw-r--r--media/libopus/src/mlp.c42
-rw-r--r--media/libopus/src/mlp.h20
-rw-r--r--media/libopus/src/mlp_data.c6
-rw-r--r--media/libopus/src/opus.c10
-rw-r--r--media/libopus/src/opus_decoder.c495
-rw-r--r--media/libopus/src/opus_encoder.c651
-rw-r--r--media/libopus/src/opus_multistream_decoder.c4
-rw-r--r--media/libopus/src/opus_multistream_encoder.c2
-rw-r--r--media/libopus/src/opus_private.h28
-rw-r--r--media/libopus/src/opus_projection_encoder.c42
-rw-r--r--media/libopus/src/repacketizer.c144
-rw-r--r--media/libopus/src/tansig_table.h45
99 files changed, 4928 insertions, 620 deletions
diff --git a/media/libopus/COPYING b/media/libopus/COPYING
index 9c739c34a3..75711467a3 100644
--- a/media/libopus/COPYING
+++ b/media/libopus/COPYING
@@ -1,7 +1,7 @@
-Copyright 2001-2011 Xiph.Org, Skype Limited, Octasic,
+Copyright 2001-2023 Xiph.Org, Skype Limited, Octasic,
Jean-Marc Valin, Timothy B. Terriberry,
CSIRO, Gregory Maxwell, Mark Borgerding,
- Erik de Castro Lopo
+ Erik de Castro Lopo, Mozilla, Amazon
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
diff --git a/media/libopus/celt/arm/arm_celt_map.c b/media/libopus/celt/arm/arm_celt_map.c
index ca988b66f5..cbaea49579 100644
--- a/media/libopus/celt/arm/arm_celt_map.c
+++ b/media/libopus/celt/arm/arm_celt_map.c
@@ -40,7 +40,8 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, c
celt_inner_prod_c, /* ARMv4 */
celt_inner_prod_c, /* EDSP */
celt_inner_prod_c, /* Media */
- celt_inner_prod_neon /* NEON */
+ celt_inner_prod_neon,/* NEON */
+ celt_inner_prod_neon /* DOTPROD */
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
@@ -48,7 +49,8 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const o
dual_inner_prod_c, /* ARMv4 */
dual_inner_prod_c, /* EDSP */
dual_inner_prod_c, /* Media */
- dual_inner_prod_neon /* NEON */
+ dual_inner_prod_neon,/* NEON */
+ dual_inner_prod_neon /* DOTPROD */
};
# endif
@@ -61,7 +63,8 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
- MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
+ MAY_HAVE_NEON(celt_pitch_xcorr), /* NEON */
+ MAY_HAVE_NEON(celt_pitch_xcorr) /* DOTPROD */
};
# endif
@@ -72,7 +75,8 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
- celt_pitch_xcorr_float_neon /* Neon */
+ celt_pitch_xcorr_float_neon, /* Neon */
+ celt_pitch_xcorr_float_neon /* DOTPROD */
};
# endif
# endif /* FIXED_POINT */
@@ -90,6 +94,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
+ xcorr_kernel_neon_fixed /* DOTPROD */
};
#endif
@@ -101,14 +106,16 @@ int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
- opus_fft_alloc_arm_neon /* Neon with NE10 library support */
+ opus_fft_alloc_arm_neon, /* Neon with NE10 library support */
+ opus_fft_alloc_arm_neon /* DOTPROD with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
- opus_fft_free_arm_neon /* Neon with NE10 */
+ opus_fft_free_arm_neon, /* Neon with NE10 */
+ opus_fft_free_arm_neon /* DOTPROD with NE10 */
};
# endif /* CUSTOM_MODES */
@@ -118,7 +125,8 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
- opus_fft_neon /* Neon with NE10 */
+ opus_fft_neon, /* Neon with NE10 */
+ opus_fft_neon /* DOTPROD with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
@@ -127,7 +135,8 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
- opus_ifft_neon /* Neon with NE10 */
+ opus_ifft_neon, /* Neon with NE10 */
+ opus_ifft_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -139,7 +148,8 @@ void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
- clt_mdct_forward_neon /* Neon with NE10 */
+ clt_mdct_forward_neon, /* Neon with NE10 */
+ clt_mdct_forward_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@@ -151,7 +161,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_backward_c, /* ARMv4 */
clt_mdct_backward_c, /* EDSP */
clt_mdct_backward_c, /* Media */
- clt_mdct_backward_neon /* Neon with NE10 */
+ clt_mdct_backward_neon, /* Neon with NE10 */
+ clt_mdct_backward_neon /* DOTPROD with NE10 */
};
# endif /* HAVE_ARM_NE10 */
diff --git a/media/libopus/celt/arm/armcpu.c b/media/libopus/celt/arm/armcpu.c
index c7d16e6d61..06a53435b8 100644
--- a/media/libopus/celt/arm/armcpu.c
+++ b/media/libopus/celt/arm/armcpu.c
@@ -43,6 +43,7 @@
#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
+#define OPUS_CPU_ARM_DOTPROD_FLAG (1<<OPUS_ARCH_ARM_DOTPROD)
#if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@@ -126,6 +127,14 @@ opus_uint32 opus_cpu_capabilities(void)
p = strstr(buf, " neon");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON_FLAG;
+ p = strstr(buf, " asimd");
+ if(p != NULL && (p[6] == ' ' || p[6] == '\n'))
+ flags |= OPUS_CPU_ARM_NEON_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_EDSP_FLAG;
+# endif
+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+ p = strstr(buf, " asimddp");
+ if(p != NULL && (p[8] == ' ' || p[8] == '\n'))
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
# endif
}
# endif
@@ -144,10 +153,44 @@ opus_uint32 opus_cpu_capabilities(void)
# endif
}
+#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
+ flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
+#endif
+
fclose(cpuinfo);
}
return flags;
}
+
+#elif defined(__APPLE__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+opus_uint32 opus_cpu_capabilities(void)
+{
+ opus_uint32 flags = 0;
+
+#if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+ size_t size = sizeof(uint32_t);
+ uint32_t value = 0;
+ if (!sysctlbyname("hw.optional.arm.FEAT_DotProd", &value, &size, NULL, 0) && value)
+ {
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+ }
+#endif
+
+#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
+ flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+ flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
+# endif
+#endif
+ return flags;
+}
+
#else
/* The feature registers which can tell us what the processor supports are
* accessible in priveleged modes only, so we can't have a general user-space
@@ -180,7 +223,13 @@ static int opus_select_arch_impl(void)
}
arch++;
- celt_assert(arch == OPUS_ARCH_ARM_NEON);
+ if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) {
+ celt_assert(arch == OPUS_ARCH_ARM_NEON);
+ return arch;
+ }
+ arch++;
+
+ celt_assert(arch == OPUS_ARCH_ARM_DOTPROD);
return arch;
}
diff --git a/media/libopus/celt/arm/armcpu.h b/media/libopus/celt/arm/armcpu.h
index 820262ff5f..6d5803d81a 100644
--- a/media/libopus/celt/arm/armcpu.h
+++ b/media/libopus/celt/arm/armcpu.h
@@ -46,6 +46,12 @@
# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
# endif
+# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
+# define MAY_HAVE_DOTPROD(name) name ## _dotprod
+# else
+# define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name)
+# endif
+
# if defined(OPUS_ARM_PRESUME_EDSP)
# define PRESUME_EDSP(name) name ## _edsp
# else
@@ -64,6 +70,12 @@
# define PRESUME_NEON(name) PRESUME_MEDIA(name)
# endif
+# if defined(OPUS_ARM_PRESUME_DOTPROD)
+# define PRESUME_DOTPROD(name) name ## _dotprod
+# else
+# define PRESUME_DOTPROD(name) PRESUME_NEON(name)
+# endif
+
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
@@ -71,6 +83,7 @@ int opus_select_arch(void);
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
+#define OPUS_ARCH_ARM_DOTPROD (4)
# endif
diff --git a/media/libopus/celt/arm/celt_neon_intr.c b/media/libopus/celt/arm/celt_neon_intr.c
index effda769d0..250f836218 100644
--- a/media/libopus/celt/arm/celt_neon_intr.c
+++ b/media/libopus/celt/arm/celt_neon_intr.c
@@ -38,6 +38,8 @@
#include "../pitch.h"
#if defined(FIXED_POINT)
+#include <string.h>
+
void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
@@ -47,7 +49,10 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
int16x4_t y0 = vld1_s16(y);
y += 4;
- for (j = 0; j + 8 <= len; j += 8)
+ /* This loop loads one y value more than we actually need.
+ Therefore we have to stop as soon as there are 8 or fewer samples left
+ (instead of 7), to avoid reading past the end of the array. */
+ for (j = 0; j + 8 < len; j += 8)
{
/* Load x[0...7] */
int16x8_t xx = vld1q_s16(x);
@@ -80,23 +85,79 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
x += 8;
y += 8;
}
-
- for (; j < len; j++)
- {
- int16x4_t x0 = vld1_dup_s16(x); /* load next x */
+ if (j + 4 < len) {
+ /* Load x[0...3] */
+ int16x4_t x0 = vld1_s16(x);
+ /* Load y[4...7] */
+ int16x4_t y4 = vld1_s16(y);
+ int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
+ int16x4_t y1 = vext_s16(y0, y4, 1);
+ int32x4_t a1 = vmlal_lane_s16(a0, y1, x0, 1);
+ int16x4_t y2 = vext_s16(y0, y4, 2);
+ int32x4_t a2 = vmlal_lane_s16(a1, y2, x0, 2);
+ int16x4_t y3 = vext_s16(y0, y4, 3);
+ int32x4_t a3 = vmlal_lane_s16(a2, y3, x0, 3);
+ y0 = y4;
+ a = a3;
+ x += 4;
+ y += 4;
+ j += 4;
+ }
+ if (j + 2 < len) {
+ /* Load x[0...1] */
+ int16x4x2_t xx = vld2_dup_s16(x);
+ int16x4_t x0 = xx.val[0];
+ int16x4_t x1 = xx.val[1];
+ /* Load y[4...5].
+ We would like to use vld1_dup_s32(), but casting the pointer would
+ break strict aliasing rules and potentially have alignment issues.
+ Fortunately the compiler seems capable of translating this memcpy()
+ and vdup_n_s32() into the equivalent vld1_dup_s32().*/
+ int32_t yy;
+ memcpy(&yy, y, sizeof(yy));
+ int16x4_t y4 = vreinterpret_s16_s32(vdup_n_s32(yy));
int32x4_t a0 = vmlal_s16(a, y0, x0);
-
- int16x4_t y4 = vld1_dup_s16(y); /* load next y */
- y0 = vext_s16(y0, y4, 1);
+ int16x4_t y1 = vext_s16(y0, y4, 1);
+ /* Replace bottom copy of {y[5], y[4]} in y4 with {y[3], y[2]} from y0,
+ using VSRI instead of VEXT, since it's a data-processing
+ instruction. */
+ y0 = vreinterpret_s16_s64(vsri_n_s64(vreinterpret_s64_s16(y4),
+ vreinterpret_s64_s16(y0), 32));
+ int32x4_t a1 = vmlal_s16(a0, y1, x1);
+ a = a1;
+ x += 2;
+ y += 2;
+ j += 2;
+ }
+ if (j + 1 < len) {
+ /* Load next x. */
+ int16x4_t x0 = vld1_dup_s16(x);
+ int32x4_t a0 = vmlal_s16(a, y0, x0);
+ /* Load last y. */
+ int16x4_t y4 = vld1_dup_s16(y);
+ y0 = vreinterpret_s16_s64(vsri_n_s64(vreinterpret_s64_s16(y4),
+ vreinterpret_s64_s16(y0), 16));
a = a0;
x++;
- y++;
}
-
- vst1q_s32(sum, a);
+ /* Load last x. */
+ int16x4_t x0 = vld1_dup_s16(x);
+ int32x4_t a0 = vmlal_s16(a, y0, x0);
+ vst1q_s32(sum, a0);
}
#else
+
+#if defined(__ARM_FEATURE_FMA) && defined(__ARM_ARCH_ISA_A64)
+/* If we can, force the compiler to use an FMA instruction rather than break
+ * vmlaq_f32() into fmul/fadd. */
+#ifdef vmlaq_lane_f32
+#undef vmlaq_lane_f32
+#endif
+#define vmlaq_lane_f32(a,b,c,lane) vfmaq_lane_f32(a,b,c,lane)
+#endif
+
+
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------
diff --git a/media/libopus/celt/arm/pitch_neon_intr.c b/media/libopus/celt/arm/pitch_neon_intr.c
index 35cc46e2c2..43885f528c 100644
--- a/media/libopus/celt/arm/pitch_neon_intr.c
+++ b/media/libopus/celt/arm/pitch_neon_intr.c
@@ -130,6 +130,13 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus
/* ========================================================================== */
+#ifdef __ARM_FEATURE_FMA
+/* If we can, force the compiler to use an FMA instruction rather than break
+ vmlaq_f32() into fmul/fadd. */
+#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
+#endif
+
+
#ifdef OPUS_CHECK_ASM
/* This part of code simulates floating-point NEON operations. */
diff --git a/media/libopus/celt/celt.h b/media/libopus/celt/celt.h
index 24b6b2b520..2f501951d5 100644
--- a/media/libopus/celt/celt.h
+++ b/media/libopus/celt/celt.h
@@ -42,6 +42,10 @@
#include "entdec.h"
#include "arch.h"
+#ifdef ENABLE_DEEP_PLC
+#include "lpcnet.h"
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -149,6 +153,13 @@ int celt_decoder_get_size(int channels);
int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
+int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+ int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+#ifdef ENABLE_DEEP_PLC
+ ,LPCNetPLCState *lpcnet
+#endif
+ );
+
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
@@ -225,23 +236,13 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap, int arch);
-#ifdef NON_STATIC_COMB_FILTER_CONST_C
-void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
- opus_val16 g10, opus_val16 g11, opus_val16 g12);
-#endif
-
-#ifndef OVERRIDE_COMB_FILTER_CONST
-# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
- ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
-#endif
-
void init_caps(const CELTMode *m,int *cap,int LM,int C);
#ifdef RESYNTH
-void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
+void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
- int LM, int downsample, int silence);
+ int LM, int downsample, int silence, int arch);
#endif
#ifdef __cplusplus
diff --git a/media/libopus/celt/celt_decoder.c b/media/libopus/celt/celt_decoder.c
index 883dae15d2..743c2031bc 100644
--- a/media/libopus/celt/celt_decoder.c
+++ b/media/libopus/celt/celt_decoder.c
@@ -51,6 +51,11 @@
#include "celt_lpc.h"
#include "vq.h"
+#ifdef ENABLE_DEEP_PLC
+#include "lpcnet.h"
+#include "lpcnet_private.h"
+#endif
+
/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
current value corresponds to a pitch of 66.67 Hz. */
@@ -59,9 +64,6 @@
pitch of 480 Hz. */
#define PLC_PITCH_LAG_MIN (100)
-#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
-#define NORM_ALIASING_HACK
-#endif
/**********************************************************************/
/* */
/* DECODER */
@@ -69,6 +71,9 @@
/**********************************************************************/
#define DECODE_BUFFER_SIZE 2048
+#define PLC_UPDATE_FRAMES 4
+#define PLC_UPDATE_SAMPLES (PLC_UPDATE_FRAMES*FRAME_SIZE)
+
/** Decoder state
@brief Decoder state
*/
@@ -82,6 +87,7 @@ struct OpusCustomDecoder {
int start, end;
int signalling;
int disable_inv;
+ int complexity;
int arch;
/* Everything beyond this point gets cleared on a reset */
@@ -98,11 +104,18 @@ struct OpusCustomDecoder {
opus_val16 postfilter_gain_old;
int postfilter_tapset;
int postfilter_tapset_old;
+ int prefilter_and_fold;
celt_sig preemph_memD[2];
+#ifdef ENABLE_DEEP_PLC
+ opus_int16 plc_pcm[PLC_UPDATE_SAMPLES];
+ int plc_fill;
+ float plc_preemphasis_mem;
+#endif
+
celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
- /* opus_val16 lpc[], Size = channels*LPC_ORDER */
+ /* opus_val16 lpc[], Size = channels*CELT_LPC_ORDER */
/* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
/* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
/* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
@@ -157,7 +170,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int
{
int size = sizeof(struct CELTDecoder)
+ (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
- + channels*LPC_ORDER*sizeof(opus_val16)
+ + channels*CELT_LPC_ORDER*sizeof(opus_val16)
+ 4*2*mode->nbEBands*sizeof(opus_val16);
return size;
}
@@ -499,7 +512,100 @@ static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
return pitch_index;
}
-static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
+static void prefilter_and_fold(CELTDecoder * OPUS_RESTRICT st, int N)
+{
+ int c;
+ int CC;
+ int i;
+ int overlap;
+ celt_sig *decode_mem[2];
+ const OpusCustomMode *mode;
+ VARDECL(opus_val32, etmp);
+ mode = st->mode;
+ overlap = st->overlap;
+ CC = st->channels;
+ ALLOC(etmp, overlap, opus_val32);
+ c=0; do {
+ decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
+ } while (++c<CC);
+
+ c=0; do {
+ /* Apply the pre-filter to the MDCT overlap for the next frame because
+ the post-filter will be re-applied in the decoder after the MDCT
+ overlap. */
+ comb_filter(etmp, decode_mem[c]+DECODE_BUFFER_SIZE-N,
+ st->postfilter_period_old, st->postfilter_period, overlap,
+ -st->postfilter_gain_old, -st->postfilter_gain,
+ st->postfilter_tapset_old, st->postfilter_tapset, NULL, 0, st->arch);
+
+ /* Simulate TDAC on the concealed audio so that it blends with the
+ MDCT of the next frame. */
+ for (i=0;i<overlap/2;i++)
+ {
+ decode_mem[c][DECODE_BUFFER_SIZE-N+i] =
+ MULT16_32_Q15(mode->window[i], etmp[overlap-1-i])
+ + MULT16_32_Q15(mode->window[overlap-i-1], etmp[i]);
+ }
+ } while (++c<CC);
+}
+
+#ifdef ENABLE_DEEP_PLC
+
+#define SINC_ORDER 48
+/* h=cos(pi/2*abs(sin([-24:24]/48*pi*23./24)).^2);
+ b=sinc([-24:24]/3*1.02).*h;
+ b=b/sum(b); */
+static const float sinc_filter[SINC_ORDER+1] = {
+ 4.2931e-05f, -0.000190293f, -0.000816132f, -0.000637162f, 0.00141662f, 0.00354764f, 0.00184368f, -0.00428274f,
+ -0.00856105f, -0.0034003f, 0.00930201f, 0.0159616f, 0.00489785f, -0.0169649f, -0.0259484f, -0.00596856f,
+ 0.0286551f, 0.0405872f, 0.00649994f, -0.0509284f, -0.0716655f, -0.00665212f, 0.134336f, 0.278927f,
+ 0.339995f, 0.278927f, 0.134336f, -0.00665212f, -0.0716655f, -0.0509284f, 0.00649994f, 0.0405872f,
+ 0.0286551f, -0.00596856f, -0.0259484f, -0.0169649f, 0.00489785f, 0.0159616f, 0.00930201f, -0.0034003f,
+ -0.00856105f, -0.00428274f, 0.00184368f, 0.00354764f, 0.00141662f, -0.000637162f, -0.000816132f, -0.000190293f,
+ 4.2931e-05f
+};
+
+void update_plc_state(LPCNetPLCState *lpcnet, celt_sig *decode_mem[2], float *plc_preemphasis_mem, int CC)
+{
+ int i;
+ int tmp_read_post, tmp_fec_skip;
+ int offset;
+ celt_sig buf48k[DECODE_BUFFER_SIZE];
+ opus_int16 buf16k[PLC_UPDATE_SAMPLES];
+ if (CC == 1) OPUS_COPY(buf48k, decode_mem[0], DECODE_BUFFER_SIZE);
+ else {
+ for (i=0;i<DECODE_BUFFER_SIZE;i++) {
+ buf48k[i] = .5*(decode_mem[0][i] + decode_mem[1][i]);
+ }
+ }
+ /* Down-sample the last 40 ms. */
+ for (i=1;i<DECODE_BUFFER_SIZE;i++) buf48k[i] += PREEMPHASIS*buf48k[i-1];
+ *plc_preemphasis_mem = buf48k[DECODE_BUFFER_SIZE-1];
+ offset = DECODE_BUFFER_SIZE-SINC_ORDER-1 - 3*(PLC_UPDATE_SAMPLES-1);
+ celt_assert(3*(PLC_UPDATE_SAMPLES-1) + SINC_ORDER + offset == DECODE_BUFFER_SIZE-1);
+ for (i=0;i<PLC_UPDATE_SAMPLES;i++) {
+ int j;
+ float sum = 0;
+ for (j=0;j<SINC_ORDER+1;j++) {
+ sum += buf48k[3*i + j + offset]*sinc_filter[j];
+ }
+ buf16k[i] = float2int(MIN32(32767.f, MAX32(-32767.f, sum)));
+ }
+ tmp_read_post = lpcnet->fec_read_pos;
+ tmp_fec_skip = lpcnet->fec_skip;
+ for (i=0;i<PLC_UPDATE_FRAMES;i++) {
+ lpcnet_plc_update(lpcnet, &buf16k[FRAME_SIZE*i]);
+ }
+ lpcnet->fec_read_pos = tmp_read_post;
+ lpcnet->fec_skip = tmp_fec_skip;
+}
+#endif
+
+static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM
+#ifdef ENABLE_DEEP_PLC
+ ,LPCNetPLCState *lpcnet
+#endif
+ )
{
int c;
int i;
@@ -527,22 +633,22 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<C);
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
- oldBandE = lpc+C*LPC_ORDER;
+ oldBandE = lpc+C*CELT_LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
oldLogE2 = oldLogE + 2*nbEBands;
backgroundLogE = oldLogE2 + 2*nbEBands;
loss_duration = st->loss_duration;
start = st->start;
+#ifdef ENABLE_DEEP_PLC
+ noise_based = start != 0 || (lpcnet->fec_fill_pos == 0 && (st->skip_plc || loss_duration >= 80));
+#else
noise_based = loss_duration >= 40 || start != 0 || st->skip_plc;
+#endif
if (noise_based)
{
/* Noise-based PLC/CNG */
-#ifdef NORM_ALIASING_HACK
- celt_norm *X;
-#else
VARDECL(celt_norm, X);
-#endif
opus_uint32 seed;
int end;
int effEnd;
@@ -550,18 +656,16 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
end = st->end;
effEnd = IMAX(start, IMIN(end, mode->effEBands));
-#ifdef NORM_ALIASING_HACK
- /* This is an ugly hack that breaks aliasing rules and would be easily broken,
- but it saves almost 4kB of stack. */
- X = (celt_norm*)(out_syn[C-1]+overlap/2);
-#else
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
-#endif
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
- DECODE_BUFFER_SIZE-N+(overlap>>1));
+ DECODE_BUFFER_SIZE-N+overlap);
} while (++c<C);
+ if (st->prefilter_and_fold) {
+ prefilter_and_fold(st, N);
+ }
+
/* Energy decay */
decay = loss_duration==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
c=0; do
@@ -590,6 +694,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
st->rng = seed;
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
+ st->prefilter_and_fold = 0;
+ /* Skip regular PLC until we get two consecutive packets. */
+ st->skip_plc = 1;
} else {
int exc_length;
/* Pitch-based PLC */
@@ -597,12 +704,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
opus_val16 *exc;
opus_val16 fade = Q15ONE;
int pitch_index;
- VARDECL(opus_val32, etmp);
VARDECL(opus_val16, _exc);
VARDECL(opus_val16, fir_tmp);
if (loss_duration == 0)
{
+#ifdef ENABLE_DEEP_PLC
+ if (lpcnet->loaded) update_plc_state(lpcnet, decode_mem, &st->plc_preemphasis_mem, C);
+#endif
st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
} else {
pitch_index = st->last_pitch_index;
@@ -613,10 +722,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
decaying signal, but we can't get more than MAX_PERIOD. */
exc_length = IMIN(2*pitch_index, MAX_PERIOD);
- ALLOC(etmp, overlap, opus_val32);
- ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
+ ALLOC(_exc, MAX_PERIOD+CELT_LPC_ORDER, opus_val16);
ALLOC(fir_tmp, exc_length, opus_val16);
- exc = _exc+LPC_ORDER;
+ exc = _exc+CELT_LPC_ORDER;
window = mode->window;
c=0; do {
opus_val16 decay;
@@ -628,16 +736,16 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
int j;
buf = decode_mem[c];
- for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
- exc[i-LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
+ for (i=0;i<MAX_PERIOD+CELT_LPC_ORDER;i++)
+ exc[i-CELT_LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-CELT_LPC_ORDER+i], SIG_SHIFT);
if (loss_duration == 0)
{
- opus_val32 ac[LPC_ORDER+1];
+ opus_val32 ac[CELT_LPC_ORDER+1];
/* Compute LPC coefficients for the last MAX_PERIOD samples before
the first loss so we can work in the excitation-filter domain. */
_celt_autocorr(exc, ac, window, overlap,
- LPC_ORDER, MAX_PERIOD, st->arch);
+ CELT_LPC_ORDER, MAX_PERIOD, st->arch);
/* Add a noise floor of -40 dB. */
#ifdef FIXED_POINT
ac[0] += SHR32(ac[0],13);
@@ -645,7 +753,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
ac[0] *= 1.0001f;
#endif
/* Use lag windowing to stabilize the Levinson-Durbin recursion. */
- for (i=1;i<=LPC_ORDER;i++)
+ for (i=1;i<=CELT_LPC_ORDER;i++)
{
/*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
#ifdef FIXED_POINT
@@ -654,7 +762,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
#endif
}
- _celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
+ _celt_lpc(lpc+c*CELT_LPC_ORDER, ac, CELT_LPC_ORDER);
#ifdef FIXED_POINT
/* For fixed-point, apply bandwidth expansion until we can guarantee that
no overflow can happen in the IIR filter. This means:
@@ -662,13 +770,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
while (1) {
opus_val16 tmp=Q15ONE;
opus_val32 sum=QCONST16(1., SIG_SHIFT);
- for (i=0;i<LPC_ORDER;i++)
- sum += ABS16(lpc[c*LPC_ORDER+i]);
+ for (i=0;i<CELT_LPC_ORDER;i++)
+ sum += ABS16(lpc[c*CELT_LPC_ORDER+i]);
if (sum < 65535) break;
- for (i=0;i<LPC_ORDER;i++)
+ for (i=0;i<CELT_LPC_ORDER;i++)
{
tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
- lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
+ lpc[c*CELT_LPC_ORDER+i] = MULT16_16_Q15(lpc[c*CELT_LPC_ORDER+i], tmp);
}
}
#endif
@@ -678,8 +786,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
{
/* Compute the excitation for exc_length samples before the loss. We need the copy
because celt_fir() cannot filter in-place. */
- celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
- fir_tmp, exc_length, LPC_ORDER, st->arch);
+ celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*CELT_LPC_ORDER,
+ fir_tmp, exc_length, CELT_LPC_ORDER, st->arch);
OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
}
@@ -737,15 +845,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
S1 += SHR32(MULT16_16(tmp, tmp), 10);
}
{
- opus_val16 lpc_mem[LPC_ORDER];
+ opus_val16 lpc_mem[CELT_LPC_ORDER];
/* Copy the last decoded samples (prior to the overlap region) to
synthesis filter memory so we can have a continuous signal. */
- for (i=0;i<LPC_ORDER;i++)
+ for (i=0;i<CELT_LPC_ORDER;i++)
lpc_mem[i] = SROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
/* Apply the synthesis filter to convert the excitation back into
the signal domain. */
- celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
- buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
+ celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*CELT_LPC_ORDER,
+ buf+DECODE_BUFFER_SIZE-N, extrapolation_len, CELT_LPC_ORDER,
lpc_mem, st->arch);
#ifdef FIXED_POINT
for (i=0; i < extrapolation_len; i++)
@@ -792,23 +900,65 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
}
}
- /* Apply the pre-filter to the MDCT overlap for the next frame because
- the post-filter will be re-applied in the decoder after the MDCT
- overlap. */
- comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
- st->postfilter_period, st->postfilter_period, overlap,
- -st->postfilter_gain, -st->postfilter_gain,
- st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
-
- /* Simulate TDAC on the concealed audio so that it blends with the
- MDCT of the next frame. */
- for (i=0;i<overlap/2;i++)
- {
- buf[DECODE_BUFFER_SIZE+i] =
- MULT16_32_Q15(window[i], etmp[overlap-1-i])
- + MULT16_32_Q15(window[overlap-i-1], etmp[i]);
- }
} while (++c<C);
+
+#ifdef ENABLE_DEEP_PLC
+ if (lpcnet->loaded && (st->complexity >= 5 || lpcnet->fec_fill_pos > 0)) {
+ float overlap_mem;
+ int samples_needed16k;
+ celt_sig *buf;
+ VARDECL(float, buf_copy);
+ buf = decode_mem[0];
+ ALLOC(buf_copy, C*overlap, float);
+ c=0; do {
+ OPUS_COPY(buf_copy+c*overlap, &decode_mem[c][DECODE_BUFFER_SIZE-N], overlap);
+ } while (++c<C);
+
+ /* Need enough samples from the PLC to cover the frame size, resampling delay,
+ and the overlap at the end. */
+ samples_needed16k = (N+SINC_ORDER+overlap)/3;
+ if (loss_duration == 0) {
+ st->plc_fill = 0;
+ }
+ while (st->plc_fill < samples_needed16k) {
+ lpcnet_plc_conceal(lpcnet, &st->plc_pcm[st->plc_fill]);
+ st->plc_fill += FRAME_SIZE;
+ }
+ /* Resample to 48 kHz. */
+ for (i=0;i<(N+overlap)/3;i++) {
+ int j;
+ float sum;
+ for (sum=0, j=0;j<17;j++) sum += 3*st->plc_pcm[i+j]*sinc_filter[3*j];
+ buf[DECODE_BUFFER_SIZE-N+3*i] = sum;
+ for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+2];
+ buf[DECODE_BUFFER_SIZE-N+3*i+1] = sum;
+ for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+1];
+ buf[DECODE_BUFFER_SIZE-N+3*i+2] = sum;
+ }
+ OPUS_MOVE(st->plc_pcm, &st->plc_pcm[N/3], st->plc_fill-N/3);
+ st->plc_fill -= N/3;
+ for (i=0;i<N;i++) {
+ float tmp = buf[DECODE_BUFFER_SIZE-N+i];
+ buf[DECODE_BUFFER_SIZE-N+i] -= PREEMPHASIS*st->plc_preemphasis_mem;
+ st->plc_preemphasis_mem = tmp;
+ }
+ overlap_mem = st->plc_preemphasis_mem;
+ for (i=0;i<overlap;i++) {
+ float tmp = buf[DECODE_BUFFER_SIZE+i];
+ buf[DECODE_BUFFER_SIZE+i] -= PREEMPHASIS*overlap_mem;
+ overlap_mem = tmp;
+ }
+ /* For now, we just do mono PLC. */
+ if (C==2) OPUS_COPY(decode_mem[1], decode_mem[0], DECODE_BUFFER_SIZE+overlap);
+ c=0; do {
+ /* Cross-fade with 48-kHz non-neural PLC for the first 2.5 ms to avoid a discontinuity. */
+ if (loss_duration == 0) {
+ for (i=0;i<overlap;i++) decode_mem[c][DECODE_BUFFER_SIZE-N+i] = (1-window[i])*buf_copy[c*overlap+i] + (window[i])*decode_mem[c][DECODE_BUFFER_SIZE-N+i];
+ }
+ } while (++c<C);
+ }
+#endif
+ st->prefilter_and_fold = 1;
}
/* Saturate to soemthing large to avoid wrap-around. */
@@ -817,18 +967,18 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
RESTORE_STACK;
}
-int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
- int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
+int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+ int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
+#ifdef ENABLE_DEEP_PLC
+ ,LPCNetPLCState *lpcnet
+#endif
+ )
{
int c, i, N;
int spread_decision;
opus_int32 bits;
ec_dec _dec;
-#ifdef NORM_ALIASING_HACK
- celt_norm *X;
-#else
VARDECL(celt_norm, X);
-#endif
VARDECL(int, fine_quant);
VARDECL(int, pulses);
VARDECL(int, cap);
@@ -881,7 +1031,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
frame_size *= st->downsample;
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
- oldBandE = lpc+CC*LPC_ORDER;
+ oldBandE = lpc+CC*CELT_LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
oldLogE2 = oldLogE + 2*nbEBands;
backgroundLogE = oldLogE2 + 2*nbEBands;
@@ -935,15 +1085,25 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
if (data == NULL || len<=1)
{
- celt_decode_lost(st, N, LM);
+ celt_decode_lost(st, N, LM
+#ifdef ENABLE_DEEP_PLC
+ , lpcnet
+#endif
+ );
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
RESTORE_STACK;
return frame_size/st->downsample;
}
+#ifdef ENABLE_DEEP_PLC
+ else {
+ /* FIXME: This is a bit of a hack just to make sure opus_decode_native() knows we're no longer in PLC. */
+ if (lpcnet) lpcnet->blend = 0;
+ }
+#endif
/* Check if there are at least two packets received consecutively before
* turning on the pitch-based PLC */
- st->skip_plc = st->loss_duration != 0;
+ if (st->loss_duration == 0) st->skip_plc = 0;
if (dec == NULL)
{
@@ -1006,6 +1166,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
/* Decode the global flags (first symbols in the stream) */
intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
+ /* If recovering from packet loss, make sure we make the energy prediction safe to reduce the
+ risk of getting loud artifacts. */
+ if (!intra_ener && st->loss_duration != 0) {
+ c=0; do
+ {
+ opus_val16 safety = 0;
+ int missing = IMIN(10, st->loss_duration>>LM);
+ if (LM==0) safety = QCONST16(1.5f,DB_SHIFT);
+ else if (LM==1) safety = QCONST16(.5f,DB_SHIFT);
+ for (i=start;i<end;i++)
+ {
+ if (oldBandE[c*nbEBands+i] < MAX16(oldLogE[c*nbEBands+i], oldLogE2[c*nbEBands+i])) {
+ /* If energy is going down already, continue the trend. */
+ opus_val32 slope;
+ opus_val32 E0, E1, E2;
+ E0 = oldBandE[c*nbEBands+i];
+ E1 = oldLogE[c*nbEBands+i];
+ E2 = oldLogE2[c*nbEBands+i];
+ slope = MAX32(E1 - E0, HALF32(E2 - E0));
+ E0 -= MAX32(0, (1+missing)*slope);
+ oldBandE[c*nbEBands+i] = MAX32(-QCONST16(20.f,DB_SHIFT), E0);
+ } else {
+ /* Otherwise take the min of the last frames. */
+ oldBandE[c*nbEBands+i] = MIN16(MIN16(oldBandE[c*nbEBands+i], oldLogE[c*nbEBands+i]), oldLogE2[c*nbEBands+i]);
+ }
+ /* Shorter frames have more natural fluctuations -- play it safe. */
+ oldBandE[c*nbEBands+i] -= safety;
+ }
+ } while (++c<2);
+ }
/* Get band energies */
unquant_coarse_energy(mode, start, end, oldBandE,
intra_ener, dec, C, LM);
@@ -1073,19 +1263,13 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
c=0; do {
- OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
+ OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap);
} while (++c<CC);
/* Decode fixed codebook */
ALLOC(collapse_masks, C*nbEBands, unsigned char);
-#ifdef NORM_ALIASING_HACK
- /* This is an ugly hack that breaks aliasing rules and would be easily broken,
- but it saves almost 4kB of stack. */
- X = (celt_norm*)(out_syn[CC-1]+overlap/2);
-#else
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
-#endif
quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
@@ -1109,7 +1293,9 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
for (i=0;i<C*nbEBands;i++)
oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
}
-
+ if (st->prefilter_and_fold) {
+ prefilter_and_fold(st, N);
+ }
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
C, CC, isTransient, LM, st->downsample, silence, st->arch);
@@ -1173,6 +1359,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
st->loss_duration = 0;
+ st->prefilter_and_fold = 0;
RESTORE_STACK;
if (ec_tell(dec) > 8*len)
return OPUS_INTERNAL_ERROR;
@@ -1181,6 +1368,15 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
return frame_size/st->downsample;
}
+int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
+ int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
+{
+ return celt_decode_with_ec_dred(st, data, len, pcm, frame_size, dec, accum
+#ifdef ENABLE_DEEP_PLC
+ , NULL
+#endif
+ );
+}
#ifdef CUSTOM_MODES
@@ -1254,6 +1450,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
va_start(ap, request);
switch (request)
{
+ case OPUS_SET_COMPLEXITY_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>10)
+ {
+ goto bad_arg;
+ }
+ st->complexity = value;
+ }
+ break;
+ case OPUS_GET_COMPLEXITY_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->complexity;
+ }
+ break;
case CELT_SET_START_BAND_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
@@ -1300,7 +1516,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
int i;
opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
- oldBandE = lpc+st->channels*LPC_ORDER;
+ oldBandE = lpc+st->channels*CELT_LPC_ORDER;
oldLogE = oldBandE + 2*st->mode->nbEBands;
oldLogE2 = oldLogE + 2*st->mode->nbEBands;
OPUS_CLEAR((char*)&st->DECODER_RESET_START,
diff --git a/media/libopus/celt/celt_encoder.c b/media/libopus/celt/celt_encoder.c
index 637d442cf7..7f32a801c6 100644
--- a/media/libopus/celt/celt_encoder.c
+++ b/media/libopus/celt/celt_encoder.c
@@ -281,6 +281,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
for (i=0;i<len;i++)
{
+#ifndef FIXED_POINT
+ float mem00;
+#endif
opus_val32 x,y;
x = SHR32(in[i+c*len],SIG_SHIFT);
y = ADD32(mem0, x);
@@ -288,8 +291,13 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
mem0 = mem1 + y - SHL32(x,1);
mem1 = x - SHR32(y,1);
#else
+ /* Original code:
mem0 = mem1 + y - 2*x;
mem1 = x - .5f*y;
+ Modified code to shorten dependency chains: */
+ mem00=mem0;
+ mem0 = mem0 - x + .5f*mem1;
+ mem1 = x - mem00;
#endif
tmp[i] = SROUND16(y, 2);
/*printf("%f ", tmp[i]);*/
@@ -322,10 +330,11 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift);
+ mem0 = tmp[i];
#else
- tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0);
+ mem0 = x2 + (1.f-forward_decay)*mem0;
+ tmp[i] = forward_decay*mem0;
#endif
- mem0 = tmp[i];
}
mem0=0;
@@ -337,11 +346,13 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
-#else
- tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
-#endif
mem0 = tmp[i];
maxE = MAX16(maxE, mem0);
+#else
+ mem0 = tmp[i] + 0.875f*mem0;
+ tmp[i] = 0.125f*mem0;
+ maxE = MAX16(maxE, 0.125f*mem0);
+#endif
}
/*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
@@ -967,7 +978,7 @@ static opus_val16 median_of_3(const opus_val16 *x)
return t0;
}
-static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2,
+static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, const opus_val16 *oldBandE,
int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN,
int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM,
int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc,
@@ -978,9 +989,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
opus_val16 maxDepth;
VARDECL(opus_val16, follower);
VARDECL(opus_val16, noise_floor);
+ VARDECL(opus_val16, bandLogE3);
SAVE_STACK;
ALLOC(follower, C*nbEBands, opus_val16);
ALLOC(noise_floor, C*nbEBands, opus_val16);
+ ALLOC(bandLogE3, nbEBands, opus_val16);
OPUS_CLEAR(offsets, nbEBands);
/* Dynamic allocation code */
maxDepth=-QCONST16(31.9f, DB_SHIFT);
@@ -1033,8 +1046,10 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
printf("%d ", spread_weight[i]);
printf("\n");*/
}
- /* Make sure that dynamic allocation can't make us bust the budget */
- if (effectiveBytes > 50 && LM>=1 && !lfe)
+ /* Make sure that dynamic allocation can't make us bust the budget.
+ We enable the feature starting at 24 kb/s for 20-ms frames
+ and 96 kb/s for 2.5 ms frames. */
+ if (effectiveBytes >= (30 + 5*LM) && !lfe)
{
int last=0;
c=0;do
@@ -1042,30 +1057,38 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16
opus_val16 offset;
opus_val16 tmp;
opus_val16 *f;
+ OPUS_COPY(bandLogE3, &bandLogE2[c*nbEBands], end);
+ if (LM==0) {
+ /* For 2.5 ms frames, the first 8 bands have just one bin, so the
+ energy is highly unreliable (high variance). For that reason,
+ we take the max with the previous energy so that at least 2 bins
+ are getting used. */
+ for (i=0;i<IMIN(8,end);i++) bandLogE3[i] = MAX16(bandLogE2[c*nbEBands+i], oldBandE[c*nbEBands+i]);
+ }
f = &follower[c*nbEBands];
- f[0] = bandLogE2[c*nbEBands];
+ f[0] = bandLogE3[0];
for (i=1;i<end;i++)
{
/* The last band to be at least 3 dB higher than the previous one
is the last we'll consider. Otherwise, we run into problems on
bandlimited signals. */
- if (bandLogE2[c*nbEBands+i] > bandLogE2[c*nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+ if (bandLogE3[i] > bandLogE3[i-1]+QCONST16(.5f,DB_SHIFT))
last=i;
- f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*nbEBands+i]);
+ f[i] = MIN16(f[i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE3[i]);
}
for (i=last-1;i>=0;i--)
- f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*nbEBands+i]));
+ f[i] = MIN16(f[i], MIN16(f[i+1]+QCONST16(2.f,DB_SHIFT), bandLogE3[i]));
/* Combine with a median filter to avoid dynalloc triggering unnecessarily.
The "offset" value controls how conservative we are -- a higher offset
reduces the impact of the median filter and makes dynalloc use more bits. */
offset = QCONST16(1.f, DB_SHIFT);
for (i=2;i<end-2;i++)
- f[i] = MAX16(f[i], median_of_5(&bandLogE2[c*nbEBands+i-2])-offset);
- tmp = median_of_3(&bandLogE2[c*nbEBands])-offset;
+ f[i] = MAX16(f[i], median_of_5(&bandLogE3[i-2])-offset);
+ tmp = median_of_3(&bandLogE3[0])-offset;
f[0] = MAX16(f[0], tmp);
f[1] = MAX16(f[1], tmp);
- tmp = median_of_3(&bandLogE2[c*nbEBands+end-3])-offset;
+ tmp = median_of_3(&bandLogE3[end-3])-offset;
f[end-2] = MAX16(f[end-2], tmp);
f[end-1] = MAX16(f[end-1], tmp);
@@ -1565,10 +1588,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
vbr_rate = 0;
tmp = st->bitrate*frame_size;
if (tell>1)
- tmp += tell;
+ tmp += tell*mode->Fs;
if (st->bitrate!=OPUS_BITRATE_MAX)
+ {
nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
(tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
+ ec_enc_shrink(enc, nbCompressedBytes);
+ }
effectiveBytes = nbCompressedBytes - nbFilledBytes;
}
equiv_rate = ((opus_int32)nbCompressedBytes*8*50 << (3-LM)) - (40*C+20)*((400>>LM) - 50);
@@ -1882,7 +1908,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(importance, nbEBands, int);
ALLOC(spread_weight, nbEBands, int);
- maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets,
+ maxDepth = dynalloc_analysis(bandLogE, bandLogE2, oldBandE, nbEBands, start, end, C, offsets,
st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr,
eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis, importance, spread_weight);
@@ -2246,7 +2272,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (anti_collapse_on)
{
anti_collapse(mode, X, collapse_masks, LM, C, N,
- start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
+ start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
}
c=0; do {
@@ -2265,15 +2291,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
- mode->window, overlap);
+ mode->window, overlap, st->arch);
if (LM!=0)
comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
- mode->window, overlap);
+ mode->window, overlap, st->arch);
} while (++c<CC);
/* We reuse freq[] as scratch space for the de-emphasis */
- deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
+ deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, 0);
st->prefilter_period_old = st->prefilter_period;
st->prefilter_gain_old = st->prefilter_gain;
st->prefilter_tapset_old = st->prefilter_tapset;
diff --git a/media/libopus/celt/celt_lpc.c b/media/libopus/celt/celt_lpc.c
index f91721bcab..fabca65cb3 100644
--- a/media/libopus/celt/celt_lpc.c
+++ b/media/libopus/celt/celt_lpc.c
@@ -44,7 +44,7 @@ int p
opus_val32 r;
opus_val32 error = ac[0];
#ifdef FIXED_POINT
- opus_val32 lpc[LPC_ORDER];
+ opus_val32 lpc[CELT_LPC_ORDER];
#else
float *lpc = _lpc;
#endif
@@ -158,7 +158,17 @@ void celt_fir_c(
sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
- xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ {
+ opus_val32 sum_c[4];
+ memcpy(sum_c, sum, sizeof(sum_c));
+ xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
+#endif
+ xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+ }
+#endif
y[i ] = SROUND16(sum[0], SIG_SHIFT);
y[i+1] = SROUND16(sum[1], SIG_SHIFT);
y[i+2] = SROUND16(sum[2], SIG_SHIFT);
@@ -222,8 +232,17 @@ void celt_iir(const opus_val32 *_x,
sum[1]=_x[i+1];
sum[2]=_x[i+2];
sum[3]=_x[i+3];
- xcorr_kernel(rden, y+i, sum, ord, arch);
-
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ {
+ opus_val32 sum_c[4];
+ memcpy(sum_c, sum, sizeof(sum_c));
+ xcorr_kernel_c(rden, y+i, sum_c, ord);
+#endif
+ xcorr_kernel(rden, y+i, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+ }
+#endif
/* Patch up the result to compensate for the fact that this is an IIR */
y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
_y[i ] = sum[0];
diff --git a/media/libopus/celt/celt_lpc.h b/media/libopus/celt/celt_lpc.h
index a4c5fd6ea5..97dee82f02 100644
--- a/media/libopus/celt/celt_lpc.h
+++ b/media/libopus/celt/celt_lpc.h
@@ -35,7 +35,7 @@
#include "x86/celt_lpc_sse.h"
#endif
-#define LPC_ORDER 24
+#define CELT_LPC_ORDER 24
void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
diff --git a/media/libopus/celt/cpu_support.h b/media/libopus/celt/cpu_support.h
index 7b5c56ca90..9f13d8aecf 100644
--- a/media/libopus/celt/cpu_support.h
+++ b/media/libopus/celt/cpu_support.h
@@ -35,19 +35,20 @@
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/armcpu.h"
-/* We currently support 4 ARM variants:
+/* We currently support 5 ARM variants:
* arch[0] -> ARMv4
* arch[1] -> ARMv5E
* arch[2] -> ARMv6
* arch[3] -> NEON
+ * arch[4] -> NEON+DOTPROD
*/
-#define OPUS_ARCHMASK 3
+#define OPUS_ARCHMASK 7
#elif defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+ (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
diff --git a/media/libopus/celt/entdec.c b/media/libopus/celt/entdec.c
index 0b3433ed8b..027aa24bca 100644
--- a/media/libopus/celt/entdec.c
+++ b/media/libopus/celt/entdec.c
@@ -195,6 +195,27 @@ int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
return ret;
}
+int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb){
+ opus_uint32 r;
+ opus_uint32 d;
+ opus_uint32 s;
+ opus_uint32 t;
+ int ret;
+ s=_this->rng;
+ d=_this->val;
+ r=s>>_ftb;
+ ret=-1;
+ do{
+ t=s;
+ s=IMUL32(r,_icdf[++ret]);
+ }
+ while(d<s);
+ _this->val=d-s;
+ _this->rng=t-s;
+ ec_dec_normalize(_this);
+ return ret;
+}
+
opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
unsigned ft;
unsigned s;
diff --git a/media/libopus/celt/entdec.h b/media/libopus/celt/entdec.h
index 025fc1870d..c81f26fdb2 100644
--- a/media/libopus/celt/entdec.h
+++ b/media/libopus/celt/entdec.h
@@ -81,6 +81,16 @@ int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
Return: The decoded symbol s.*/
int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
+/*Decodes a symbol given an "inverse" CDF table.
+ No call to ec_dec_update() is necessary after this call.
+ _icdf: The "inverse" CDF, such that symbol s falls in the range
+ [s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
+ The values must be monotonically non-increasing, and the last value
+ must be 0.
+ _ftb: The number of bits of precision in the cumulative distribution.
+ Return: The decoded symbol s.*/
+int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb);
+
/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
The bits must have been encoded with ec_enc_uint().
No call to ec_dec_update() is necessary after this call.
diff --git a/media/libopus/celt/entenc.c b/media/libopus/celt/entenc.c
index f1750d25b8..69c6f835d0 100644
--- a/media/libopus/celt/entenc.c
+++ b/media/libopus/celt/entenc.c
@@ -172,6 +172,17 @@ void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
ec_enc_normalize(_this);
}
+void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb){
+ opus_uint32 r;
+ r=_this->rng>>_ftb;
+ if(_s>0){
+ _this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
+ _this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
+ }
+ else _this->rng-=IMUL32(r,_icdf[_s]);
+ ec_enc_normalize(_this);
+}
+
void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
unsigned ft;
unsigned fl;
diff --git a/media/libopus/celt/entenc.h b/media/libopus/celt/entenc.h
index f502eaf662..010874bbc1 100644
--- a/media/libopus/celt/entenc.h
+++ b/media/libopus/celt/entenc.h
@@ -64,6 +64,15 @@ void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
_ftb: The number of bits of precision in the cumulative distribution.*/
void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
+/*Encodes a symbol given an "inverse" CDF table.
+ _s: The index of the symbol to encode.
+ _icdf: The "inverse" CDF, such that symbol _s falls in the range
+ [_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
+ The values must be monotonically non-increasing, and the last value
+ must be 0.
+ _ftb: The number of bits of precision in the cumulative distribution.*/
+void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb);
+
/*Encodes a raw unsigned integer in the stream.
_fl: The integer to encode.
_ft: The number of integers that can be encoded (one more than the max).
diff --git a/media/libopus/celt/laplace.c b/media/libopus/celt/laplace.c
index a7bca874b6..2180966662 100644
--- a/media/libopus/celt/laplace.c
+++ b/media/libopus/celt/laplace.c
@@ -132,3 +132,104 @@ int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
return val;
}
+
+void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay)
+{
+ int s;
+ opus_uint16 sign_icdf[3];
+ sign_icdf[0] = 32768-p0;
+ sign_icdf[1] = sign_icdf[0]/2;
+ sign_icdf[2] = 0;
+ s = value == 0 ? 0 : (value > 0 ? 1 : 2);
+ ec_enc_icdf16(enc, s, sign_icdf, 15);
+ value = abs(value);
+ if (value)
+ {
+ int i;
+ opus_uint16 icdf[8];
+ icdf[0] = IMAX(7, decay);
+ for (i=1;i<7;i++)
+ {
+ icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
+ }
+ icdf[7] = 0;
+ value--;
+ do {
+ ec_enc_icdf16(enc, IMIN(value, 7), icdf, 15);
+ value -= 7;
+ } while (value >= 0);
+ }
+}
+
+int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay)
+{
+ int s;
+ int value;
+ opus_uint16 sign_icdf[3];
+ sign_icdf[0] = 32768-p0;
+ sign_icdf[1] = sign_icdf[0]/2;
+ sign_icdf[2] = 0;
+ s = ec_dec_icdf16(dec, sign_icdf, 15);
+ if (s==2) s = -1;
+ if (s != 0)
+ {
+ int i;
+ int v;
+ opus_uint16 icdf[8];
+ icdf[0] = IMAX(7, decay);
+ for (i=1;i<7;i++)
+ {
+ icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
+ }
+ icdf[7] = 0;
+ value = 1;
+ do {
+ v = ec_dec_icdf16(dec, icdf, 15);
+ value += v;
+ } while (v == 7);
+ return s*value;
+ } else return 0;
+}
+
+#if 0
+
+#include <stdio.h>
+#define NB_VALS 10
+#define DATA_SIZE 10000
+int main() {
+ ec_enc enc;
+ ec_dec dec;
+ unsigned char *ptr;
+ int i;
+ int decay, p0;
+ int val[NB_VALS] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ /*for (i=0;i<NB_VALS;i++) {
+ val[i] = -log(rand()/(float)RAND_MAX);
+ if (rand()%2) val[i] = -val[i];
+ }*/
+ p0 = 16000;
+ decay = 16000;
+ ptr = (unsigned char *)malloc(DATA_SIZE);
+ ec_enc_init(&enc,ptr,DATA_SIZE);
+ for (i=0;i<NB_VALS;i++) {
+ printf("%d ", val[i]);
+ }
+ printf("\n");
+ for (i=0;i<NB_VALS;i++) {
+ ec_laplace_encode_p0(&enc, val[i], p0, decay);
+ }
+
+ ec_enc_done(&enc);
+
+ ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
+
+ for (i=0;i<NB_VALS;i++) {
+ val[i] = ec_laplace_decode_p0(&dec, p0, decay);
+ }
+ for (i=0;i<NB_VALS;i++) {
+ printf("%d ", val[i]);
+ }
+ printf("\n");
+}
+
+#endif
diff --git a/media/libopus/celt/laplace.h b/media/libopus/celt/laplace.h
index 46c14b5da5..8010ad9755 100644
--- a/media/libopus/celt/laplace.h
+++ b/media/libopus/celt/laplace.h
@@ -26,6 +26,9 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef LAPLACE_H
+#define LAPLACE_H
+
#include "entenc.h"
#include "entdec.h"
@@ -46,3 +49,9 @@ void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
@return Value decoded
*/
int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
+
+
+int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay);
+void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay);
+
+#endif
diff --git a/media/libopus/celt/mathops.h b/media/libopus/celt/mathops.h
index 478ac9187c..e2eece2937 100644
--- a/media/libopus/celt/mathops.h
+++ b/media/libopus/celt/mathops.h
@@ -230,6 +230,12 @@ static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
frac = SHL16(x, 4);
return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
}
+
+#undef D0
+#undef D1
+#undef D2
+#undef D3
+
/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
{
diff --git a/media/libopus/celt/mips/celt_mipsr1.h b/media/libopus/celt/mips/celt_mipsr1.h
index c332fe0471..d1b25c204d 100644
--- a/media/libopus/celt/mips/celt_mipsr1.h
+++ b/media/libopus/celt/mips/celt_mipsr1.h
@@ -27,8 +27,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef __CELT_MIPSR1_H__
-#define __CELT_MIPSR1_H__
+#ifndef CELT_MIPSR1_H__
+#define CELT_MIPSR1_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -149,4 +149,4 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
}
}
-#endif /* __CELT_MIPSR1_H__ */
+#endif /* CELT_MIPSR1_H__ */
diff --git a/media/libopus/celt/mips/mdct_mipsr1.h b/media/libopus/celt/mips/mdct_mipsr1.h
index 2934dab776..7456c181a5 100644
--- a/media/libopus/celt/mips/mdct_mipsr1.h
+++ b/media/libopus/celt/mips/mdct_mipsr1.h
@@ -38,8 +38,8 @@
MDCT implementation in FFMPEG, but has differences in signs, ordering
and scaling in many places.
*/
-#ifndef __MDCT_MIPSR1_H__
-#define __MDCT_MIPSR1_H__
+#ifndef MDCT_MIPSR1_H__
+#define MDCT_MIPSR1_H__
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
@@ -285,4 +285,4 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
}
}
}
-#endif /* __MDCT_MIPSR1_H__ */
+#endif /* MDCT_MIPSR1_H__ */
diff --git a/media/libopus/celt/mips/vq_mipsr1.h b/media/libopus/celt/mips/vq_mipsr1.h
index f26a33e755..1621c5624f 100644
--- a/media/libopus/celt/mips/vq_mipsr1.h
+++ b/media/libopus/celt/mips/vq_mipsr1.h
@@ -26,8 +26,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef __VQ_MIPSR1_H__
-#define __VQ_MIPSR1_H__
+#ifndef VQ_MIPSR1_H__
+#define VQ_MIPSR1_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -113,4 +113,4 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
/*return celt_sqrt(E);*/
}
-#endif /* __VQ_MIPSR1_H__ */
+#endif /* VQ_MIPSR1_H__ */
diff --git a/media/libopus/celt/os_support.h b/media/libopus/celt/os_support.h
index 009bf861da..7d2d378116 100644
--- a/media/libopus/celt/os_support.h
+++ b/media/libopus/celt/os_support.h
@@ -41,7 +41,7 @@
#include <string.h>
#include <stdlib.h>
-/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
+/** Opus wrapper for malloc(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
#ifndef OVERRIDE_OPUS_ALLOC
static OPUS_INLINE void *opus_alloc (size_t size)
{
@@ -49,7 +49,15 @@ static OPUS_INLINE void *opus_alloc (size_t size)
}
#endif
-/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
+#ifndef OVERRIDE_OPUS_REALLOC
+static OPUS_INLINE void *opus_realloc (void *ptr, size_t size)
+{
+ return realloc(ptr, size);
+}
+#endif
+
+/** Used only for non-threadsafe pseudostack.
+ If desired, this can always return the same area of memory rather than allocating a new one every time. */
#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
static OPUS_INLINE void *opus_alloc_scratch (size_t size)
{
@@ -58,7 +66,7 @@ static OPUS_INLINE void *opus_alloc_scratch (size_t size)
}
#endif
-/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
+/** Opus wrapper for free(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
#ifndef OVERRIDE_OPUS_FREE
static OPUS_INLINE void opus_free (void *ptr)
{
diff --git a/media/libopus/celt/pitch.c b/media/libopus/celt/pitch.c
index 7998db4164..e33c60a3bf 100644
--- a/media/libopus/celt/pitch.c
+++ b/media/libopus/celt/pitch.c
@@ -262,7 +262,16 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
- xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ {
+ opus_val32 sum_c[4]={0,0,0,0};
+ xcorr_kernel_c(_x, _y+i, sum_c, len);
+#endif
+ xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+ celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+ }
+#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
diff --git a/media/libopus/celt/pitch.h b/media/libopus/celt/pitch.h
index e425f56aea..dd0e2bebd2 100644
--- a/media/libopus/celt/pitch.h
+++ b/media/libopus/celt/pitch.h
@@ -189,4 +189,15 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
# define celt_pitch_xcorr celt_pitch_xcorr_c
#endif
+#ifdef NON_STATIC_COMB_FILTER_CONST_C
+void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
+ opus_val16 g10, opus_val16 g11, opus_val16 g12);
+#endif
+
+#ifndef OVERRIDE_COMB_FILTER_CONST
+# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
+ ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
+#endif
+
+
#endif
diff --git a/media/libopus/celt/stack_alloc.h b/media/libopus/celt/stack_alloc.h
index ae40e2a165..e2739bdf66 100644
--- a/media/libopus/celt/stack_alloc.h
+++ b/media/libopus/celt/stack_alloc.h
@@ -141,7 +141,7 @@ extern char *global_stack_top;
#else
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
-#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
+#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/(sizeof(char))),(stack)+=(size)*(sizeof(type)/(sizeof(char))),(type*)((stack)-(size)*(sizeof(type)/(sizeof(char)))))
#if 0 /* Set this to 1 to instrument pseudostack usage */
#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack)
#else
diff --git a/media/libopus/celt/x86/celt_lpc_sse4_1.c b/media/libopus/celt/x86/celt_lpc_sse4_1.c
index 5478568849..daf59d245a 100644
--- a/media/libopus/celt/x86/celt_lpc_sse4_1.c
+++ b/media/libopus/celt/x86/celt_lpc_sse4_1.c
@@ -64,9 +64,16 @@ void celt_fir_sse4_1(const opus_val16 *x,
{
opus_val32 sums[4] = {0};
__m128i vecSum, vecX;
-
- xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
-
+#if defined(OPUS_CHECK_ASM)
+ {
+ opus_val32 sums_c[4] = {0};
+ xcorr_kernel_c(rnum, x+i-ord, sums_c, ord);
+#endif
+ xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
+#if defined(OPUS_CHECK_ASM)
+ celt_assert(memcmp(sums, sums_c, sizeof(sums)) == 0);
+ }
+#endif
vecSum = _mm_loadu_si128((__m128i *)sums);
vecSum = _mm_add_epi32(vecSum, vecNoA);
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
diff --git a/media/libopus/celt/x86/pitch_avx.c b/media/libopus/celt/x86/pitch_avx.c
new file mode 100644
index 0000000000..f731762d84
--- /dev/null
+++ b/media/libopus/celt/x86/pitch_avx.c
@@ -0,0 +1,101 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <immintrin.h>
+#include "x86cpu.h"
+#include "pitch.h"
+
+#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(FIXED_POINT)
+
+/* Like the "regular" xcorr_kernel(), but computes 8 results at a time. */
+static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int len)
+{
+ __m256 xsum0, xsum1, xsum2, xsum3, xsum4, xsum5, xsum6, xsum7;
+ xsum7 = xsum6 = xsum5 = xsum4 = xsum3 = xsum2 = xsum1 = xsum0 = _mm256_setzero_ps();
+ int i;
+ __m256 x0;
+ /* Compute 8 inner products using partial sums. */
+ for (i=0;i<len-7;i+=8)
+ {
+ x0 = _mm256_loadu_ps(x+i);
+ xsum0 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i ), xsum0);
+ xsum1 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+1), xsum1);
+ xsum2 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+2), xsum2);
+ xsum3 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+3), xsum3);
+ xsum4 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+4), xsum4);
+ xsum5 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+5), xsum5);
+ xsum6 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+6), xsum6);
+ xsum7 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+7), xsum7);
+ }
+ if (i != len) {
+ static const int mask[15] = {-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
+ __m256i m;
+ m = _mm256_loadu_si256((__m256i*)(void*)(mask + 7+i-len));
+ x0 = _mm256_maskload_ps(x+i, m);
+ xsum0 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i , m), xsum0);
+ xsum1 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+1, m), xsum1);
+ xsum2 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+2, m), xsum2);
+ xsum3 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+3, m), xsum3);
+ xsum4 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+4, m), xsum4);
+ xsum5 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+5, m), xsum5);
+ xsum6 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+6, m), xsum6);
+ xsum7 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+7, m), xsum7);
+ }
+ /* 8 horizontal adds. */
+ /* Compute [0 4] [1 5] [2 6] [3 7] */
+ xsum0 = _mm256_add_ps(_mm256_permute2f128_ps(xsum0, xsum4, 2<<4), _mm256_permute2f128_ps(xsum0, xsum4, 1 | (3<<4)));
+ xsum1 = _mm256_add_ps(_mm256_permute2f128_ps(xsum1, xsum5, 2<<4), _mm256_permute2f128_ps(xsum1, xsum5, 1 | (3<<4)));
+ xsum2 = _mm256_add_ps(_mm256_permute2f128_ps(xsum2, xsum6, 2<<4), _mm256_permute2f128_ps(xsum2, xsum6, 1 | (3<<4)));
+ xsum3 = _mm256_add_ps(_mm256_permute2f128_ps(xsum3, xsum7, 2<<4), _mm256_permute2f128_ps(xsum3, xsum7, 1 | (3<<4)));
+ /* Compute [0 1 4 5] [2 3 6 7] */
+ xsum0 = _mm256_hadd_ps(xsum0, xsum1);
+ xsum1 = _mm256_hadd_ps(xsum2, xsum3);
+ /* Compute [0 1 2 3 4 5 6 7] */
+ xsum0 = _mm256_hadd_ps(xsum0, xsum1);
+ _mm256_storeu_ps(sum, xsum0);
+}
+
+void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch)
+{
+ int i;
+ celt_assert(max_pitch>0);
+ (void)arch;
+ for (i=0;i<max_pitch-7;i+=8)
+ {
+ xcorr_kernel_avx(_x, _y+i, &xcorr[i], len);
+ }
+ for (;i<max_pitch;i++)
+ {
+ xcorr[i] = celt_inner_prod(_x, _y+i, len, arch);
+ }
+}
+
+#endif
diff --git a/media/libopus/celt/x86/pitch_sse.h b/media/libopus/celt/x86/pitch_sse.h
index 964aef50db..127581f3e1 100644
--- a/media/libopus/celt/x86/pitch_sse.h
+++ b/media/libopus/celt/x86/pitch_sse.h
@@ -131,12 +131,6 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
-#define OVERRIDE_DUAL_INNER_PROD
-#define OVERRIDE_COMB_FILTER_CONST
-
-#undef dual_inner_prod
-#undef comb_filter_const
-
void dual_inner_prod_sse(const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
@@ -154,13 +148,17 @@ void comb_filter_const_sse(opus_val32 *y,
#if defined(OPUS_X86_PRESUME_SSE)
+#define OVERRIDE_DUAL_INNER_PROD
+#define OVERRIDE_COMB_FILTER_CONST
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
-#else
+#elif defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_DUAL_INNER_PROD
+#define OVERRIDE_COMB_FILTER_CONST
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y01,
@@ -187,6 +185,32 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
#define NON_STATIC_COMB_FILTER_CONST_C
#endif
-#endif
+
+void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch);
+
+#if defined(OPUS_X86_PRESUME_AVX2)
+
+#define OVERRIDE_PITCH_XCORR
+# define celt_pitch_xcorr celt_pitch_xcorr_avx2
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
+
+#define OVERRIDE_PITCH_XCORR
+extern void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
+ const float *_x,
+ const float *_y,
+ float *xcorr,
+ int len,
+ int max_pitch,
+ int arch
+ );
+
+#define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
+ ((*PITCH_XCORR_IMPL[(arch) & OPUS_ARCHMASK])(_x, _y, xcorr, len, max_pitch, arch))
+
+
+#endif /* OPUS_X86_PRESUME_AVX2 && !OPUS_HAVE_RTCD */
+
+#endif /* OPUS_X86_MAY_HAVE_SSE && !FIXED_POINT */
#endif
diff --git a/media/libopus/celt/x86/vq_sse.h b/media/libopus/celt/x86/vq_sse.h
index b4efe8f249..444503b630 100644
--- a/media/libopus/celt/x86/vq_sse.h
+++ b/media/libopus/celt/x86/vq_sse.h
@@ -28,16 +28,18 @@
#define VQ_SSE_H
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
-#define OVERRIDE_OP_PVQ_SEARCH
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
#if defined(OPUS_X86_PRESUME_SSE2)
+
+#define OVERRIDE_OP_PVQ_SEARCH
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_sse2(x, iy, K, N, arch))
-#else
+#elif defined(OPUS_HAVE_RTCD)
+#define OVERRIDE_OP_PVQ_SEARCH
extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch);
diff --git a/media/libopus/celt/x86/vq_sse2.c b/media/libopus/celt/x86/vq_sse2.c
index 775042860d..4c4ebf8e2d 100644
--- a/media/libopus/celt/x86/vq_sse2.c
+++ b/media/libopus/celt/x86/vq_sse2.c
@@ -75,7 +75,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
sums = _mm_add_ps(sums, x4);
/* Clear y and iy in case we don't do the projection. */
_mm_storeu_ps(&y[j], _mm_setzero_ps());
- _mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
+ _mm_storeu_si128((__m128i*)(void*)&iy[j], _mm_setzero_si128());
_mm_storeu_ps(&X[j], x4);
_mm_storeu_ps(&signy[j], s4);
}
@@ -116,7 +116,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
rx4 = _mm_mul_ps(x4, rcp4);
iy4 = _mm_cvttps_epi32(rx4);
pulses_sum = _mm_add_epi32(pulses_sum, iy4);
- _mm_storeu_si128((__m128i*)&iy[j], iy4);
+ _mm_storeu_si128((__m128i*)(void*)&iy[j], iy4);
y4 = _mm_cvtepi32_ps(iy4);
xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
@@ -205,10 +205,10 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
{
__m128i y4;
__m128i s4;
- y4 = _mm_loadu_si128((__m128i*)&iy[j]);
+ y4 = _mm_loadu_si128((__m128i*)(void*)&iy[j]);
s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
- _mm_storeu_si128((__m128i*)&iy[j], y4);
+ _mm_storeu_si128((__m128i*)(void*)&iy[j], y4);
}
RESTORE_STACK;
return yy;
diff --git a/media/libopus/celt/x86/x86_arch_macros.h b/media/libopus/celt/x86/x86_arch_macros.h
new file mode 100644
index 0000000000..975b443e93
--- /dev/null
+++ b/media/libopus/celt/x86/x86_arch_macros.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2023 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef _MSC_VER
+
+# ifdef OPUS_X86_MAY_HAVE_SSE
+# ifndef __SSE__
+# define __SSE__
+# endif
+# endif
+
+# ifdef OPUS_X86_MAY_HAVE_SSE2
+# ifndef __SSE2__
+# define __SSE2__
+# endif
+# endif
+
+# ifdef OPUS_X86_MAY_HAVE_SSE4_1
+# ifndef __SSE4_1__
+# define __SSE4_1__
+# endif
+# endif
+
+#endif
diff --git a/media/libopus/celt/x86/x86_celt_map.c b/media/libopus/celt/x86/x86_celt_map.c
index d39d88edec..ba8eafe6ad 100644
--- a/media/libopus/celt/x86/x86_celt_map.c
+++ b/media/libopus/celt/x86/x86_celt_map.c
@@ -90,6 +90,26 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
# else
+#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)
+
+void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
+ const float *_x,
+ const float *_y,
+ float *xcorr,
+ int len,
+ int max_pitch,
+ int arch
+) = {
+ celt_pitch_xcorr_c, /* non-sse */
+ celt_pitch_xcorr_c,
+ celt_pitch_xcorr_c,
+ celt_pitch_xcorr_c,
+ MAY_HAVE_AVX2(celt_pitch_xcorr)
+};
+
+#endif
+
+
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
diff --git a/media/libopus/celt/x86/x86cpu.c b/media/libopus/celt/x86/x86cpu.c
index 6a1914dee7..2e7c32aeec 100644
--- a/media/libopus/celt/x86/x86cpu.c
+++ b/media/libopus/celt/x86/x86cpu.c
@@ -39,7 +39,7 @@
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
- (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
+ (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#if defined(_MSC_VER)
@@ -105,7 +105,7 @@ typedef struct CPU_Feature{
int HW_SSE2;
int HW_SSE41;
/* SIMD: 256-bit */
- int HW_AVX;
+ int HW_AVX2;
} CPU_Feature;
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
@@ -121,13 +121,19 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
- cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
+ cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
+ if (cpu_feature->HW_AVX2 && nIds >= 7) {
+ cpuid(info, 7);
+ cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
+ } else {
+ cpu_feature->HW_AVX2 = 0;
+ }
}
else {
cpu_feature->HW_SSE = 0;
cpu_feature->HW_SSE2 = 0;
cpu_feature->HW_SSE41 = 0;
- cpu_feature->HW_AVX = 0;
+ cpu_feature->HW_AVX2 = 0;
}
}
@@ -157,7 +163,7 @@ static int opus_select_arch_impl(void)
}
arch++;
- if (!cpu_feature.HW_AVX)
+ if (!cpu_feature.HW_AVX2)
{
return arch;
}
diff --git a/media/libopus/celt/x86/x86cpu.h b/media/libopus/celt/x86/x86cpu.h
index 04e80489b1..8ae9be8d8f 100644
--- a/media/libopus/celt/x86/x86cpu.h
+++ b/media/libopus/celt/x86/x86cpu.h
@@ -46,28 +46,53 @@
# define MAY_HAVE_SSE4_1(name) name ## _c
# endif
-# if defined(OPUS_X86_MAY_HAVE_AVX)
-# define MAY_HAVE_AVX(name) name ## _avx
+# if defined(OPUS_X86_MAY_HAVE_AVX2)
+# define MAY_HAVE_AVX2(name) name ## _avx2
# else
-# define MAY_HAVE_AVX(name) name ## _c
+# define MAY_HAVE_AVX2(name) name ## _c
# endif
-# if defined(OPUS_HAVE_RTCD)
+# if defined(OPUS_HAVE_RTCD) && \
+ ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+ (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
+ (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
+ (defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
int opus_select_arch(void);
# endif
+# if defined(OPUS_X86_MAY_HAVE_SSE2)
+# include "opus_defines.h"
+
/*MOVD should not impose any alignment restrictions, but the C standard does,
and UBSan will report errors if we actually make unaligned accesses.
Use this to work around those restrictions (which should hopefully all get
- optimized to a single MOVD instruction).*/
-#define OP_LOADU_EPI32(x) \
- (int)((*(unsigned char *)(x) | *((unsigned char *)(x) + 1) << 8U |\
- *((unsigned char *)(x) + 2) << 16U | (opus_uint32)*((unsigned char *)(x) + 3) << 24U))
+ optimized to a single MOVD instruction).
+ GCC implemented _mm_loadu_si32() since GCC 11; HOWEVER, there is a bug!
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99754 */
+# if !defined(_MSC_VER) && !OPUS_GNUC_PREREQ(11,3) && !(defined(__clang__) && (__clang_major__ >= 8))
+# include <string.h>
+# include <emmintrin.h>
+
+# ifdef _mm_loadu_si32
+# undef _mm_loadu_si32
+# endif
+# define _mm_loadu_si32 WORKAROUND_mm_loadu_si32
+static inline __m128i WORKAROUND_mm_loadu_si32(void const* mem_addr) {
+ int val;
+ memcpy(&val, mem_addr, sizeof(val));
+ return _mm_cvtsi32_si128(val);
+}
+# elif defined(_MSC_VER)
+ /* MSVC needs this for _mm_loadu_si32 */
+# include <immintrin.h>
+# endif
-#define OP_CVTEPI8_EPI32_M32(x) \
- (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(OP_LOADU_EPI32(x))))
+# define OP_CVTEPI8_EPI32_M32(x) \
+ (_mm_cvtepi8_epi32(_mm_loadu_si32(x)))
-#define OP_CVTEPI16_EPI32_M64(x) \
- (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
+# define OP_CVTEPI16_EPI32_M64(x) \
+ (_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(void*)(x))))
+
+# endif
#endif
diff --git a/media/libopus/include/opus.h b/media/libopus/include/opus.h
index 0c69c627d9..eadeda75a0 100644
--- a/media/libopus/include/opus.h
+++ b/media/libopus/include/opus.h
@@ -103,7 +103,7 @@ extern "C" {
* @endcode
*
* where opus_encoder_get_size() returns the required size for the encoder state. Note that
- * future versions of this code may change the size, so no assuptions should be made about it.
+ * future versions of this code may change the size, so no assumptions should be made about it.
*
* The encoder state is always continuous in memory and only a shallow copy is sufficient
* to copy it (e.g. memcpy())
@@ -357,7 +357,7 @@ OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NON
* error = opus_decoder_init(dec, Fs, channels);
* @endcode
* where opus_decoder_get_size() returns the required size for the decoder state. Note that
- * future versions of this code may change the size, so no assuptions should be made about it.
+ * future versions of this code may change the size, so no assumptions should be made about it.
*
* The decoder state is always continuous in memory and only a shallow copy is sufficient
* to copy it (e.g. memcpy())
@@ -398,6 +398,21 @@ OPUS_EXPORT int opus_encoder_ctl(OpusEncoder *st, int request, ...) OPUS_ARG_NON
*/
typedef struct OpusDecoder OpusDecoder;
+/** Opus DRED decoder.
+ * This contains the complete state of an Opus DRED decoder.
+ * It is position independent and can be freely copied.
+ * @see opus_dred_decoder_create,opus_dred_decoder_init
+ */
+typedef struct OpusDREDDecoder OpusDREDDecoder;
+
+
+/** Opus DRED state.
+ * This contains the complete state of an Opus DRED packet.
+ * It is position independent and can be freely copied.
+ * @see opus_dred_create,opus_dred_init
+ */
+typedef struct OpusDRED OpusDRED;
+
/** Gets the size of an <code>OpusDecoder</code> structure.
* @param [in] channels <tt>int</tt>: Number of channels.
* This must be 1 or 2.
@@ -511,6 +526,101 @@ OPUS_EXPORT int opus_decoder_ctl(OpusDecoder *st, int request, ...) OPUS_ARG_NON
*/
OPUS_EXPORT void opus_decoder_destroy(OpusDecoder *st);
+/** Gets the size of an <code>OpusDREDDecoder</code> structure.
+ * @returns The size in bytes.
+ */
+OPUS_EXPORT int opus_dred_decoder_get_size(void);
+
+/** Allocates and initializes an OpusDREDDecoder state.
+ * @param [out] error <tt>int*</tt>: #OPUS_OK Success or @ref opus_errorcodes
+ */
+OPUS_EXPORT OpusDREDDecoder *opus_dred_decoder_create(int *error);
+
+/** Initializes an <code>OpusDREDDecoder</code> state.
+ * @param[in] dec <tt>OpusDREDDecoder*</tt>: State to be initialized.
+ */
+OPUS_EXPORT int opus_dred_decoder_init(OpusDREDDecoder *dec);
+
+/** Frees an <code>OpusDREDDecoder</code> allocated by opus_dred_decoder_create().
+ * @param[in] dec <tt>OpusDREDDecoder*</tt>: State to be freed.
+ */
+OPUS_EXPORT void opus_dred_decoder_destroy(OpusDREDDecoder *dec);
+
+/** Perform a CTL function on an Opus DRED decoder.
+ *
+ * Generally the request and subsequent arguments are generated
+ * by a convenience macro.
+ * @param dred_dec <tt>OpusDREDDecoder*</tt>: DRED Decoder state.
+ * @param request This and all remaining parameters should be replaced by one
+ * of the convenience macros in @ref opus_genericctls or
+ * @ref opus_decoderctls.
+ * @see opus_genericctls
+ * @see opus_decoderctls
+ */
+OPUS_EXPORT int opus_dred_decoder_ctl(OpusDREDDecoder *dred_dec, int request, ...);
+
+/** Gets the size of an <code>OpusDRED</code> structure.
+ * @returns The size in bytes.
+ */
+OPUS_EXPORT int opus_dred_get_size(void);
+
+/** Allocates and initializes a DRED state.
+ * @param [out] error <tt>int*</tt>: #OPUS_OK Success or @ref opus_errorcodes
+ */
+OPUS_EXPORT OpusDRED *opus_dred_alloc(int *error);
+
+/** Frees an <code>OpusDRED</code> allocated by opus_dred_create().
+ * @param[in] dec <tt>OpusDRED*</tt>: State to be freed.
+ */
+OPUS_EXPORT void opus_dred_free(OpusDRED *dec);
+
+/** Decode an Opus DRED packet.
+ * @param [in] dred_dec <tt>OpusDRED*</tt>: DRED Decoder state
+ * @param [in] dred <tt>OpusDRED*</tt>: DRED state
+ * @param [in] data <tt>char*</tt>: Input payload
+ * @param [in] len <tt>opus_int32</tt>: Number of bytes in payload
+ * @param [in] max_dred_samples <tt>opus_int32</tt>: Maximum number of DRED samples that may be needed (if available in the packet).
+ * @param [in] sampling_rate <tt>opus_int32</tt>: Sampling rate used for max_dred_samples argument. Needs not match the actual sampling rate of the decoder.
+ * @param [out] dred_end <tt>opus_int32*</tt>: Number of non-encoded (silence) samples between the DRED timestamp and the last DRED sample.
+ * @param [in] defer_processing <tt>int</tt>: Flag (0 or 1). If set to one, the CPU-intensive part of the DRED decoding is deferred until opus_dred_process() is called.
+ * @returns Offset (positive) of the first decoded DRED samples, zero if no DRED is present, or @ref opus_errorcodes
+ */
+OPUS_EXPORT int opus_dred_parse(OpusDREDDecoder *dred_dec, OpusDRED *dred, const unsigned char *data, opus_int32 len, opus_int32 max_dred_samples, opus_int32 sampling_rate, int *dred_end, int defer_processing) OPUS_ARG_NONNULL(1);
+
+/** Finish decoding an Opus DRED packet. The function only needs to be called if opus_dred_parse() was called with defer_processing=1.
+ * The source and destination will often be the same DRED state.
+ * @param [in] dred_dec <tt>OpusDRED*</tt>: DRED Decoder state
+ * @param [in] src <tt>OpusDRED*</tt>: Source DRED state to start the processing from.
+ * @param [out] dst <tt>OpusDRED*</tt>: Destination DRED state to store the updated state after processing.
+ * @returns @ref opus_errorcodes
+ */
+OPUS_EXPORT int opus_dred_process(OpusDREDDecoder *dred_dec, const OpusDRED *src, OpusDRED *dst);
+
+/** Decode audio from an Opus DRED packet with floating point output.
+ * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+ * @param [in] dred <tt>OpusDRED*</tt>: DRED state
+ * @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
+ * @param [out] pcm <tt>opus_int16*</tt>: Output signal (interleaved if 2 channels). length
+ * is frame_size*channels*sizeof(opus_int16)
+ * @param [in] frame_size Number of samples per channel to decode in \a pcm.
+ * frame_size <b>must</b> be a multiple of 2.5 ms.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_EXPORT int opus_decoder_dred_decode(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int16 *pcm, opus_int32 frame_size);
+
+/** Decode audio from an Opus DRED packet with floating point output.
+ * @param [in] st <tt>OpusDecoder*</tt>: Decoder state
+ * @param [in] dred <tt>OpusDRED*</tt>: DRED state
+ * @param [in] dred_offset <tt>opus_int32</tt>: position of the redundancy to decode (in samples before the beginning of the real audio data in the packet).
+ * @param [out] pcm <tt>float*</tt>: Output signal (interleaved if 2 channels). length
+ * is frame_size*channels*sizeof(float)
+ * @param [in] frame_size Number of samples per channel to decode in \a pcm.
+ * frame_size <b>must</b> be a multiple of 2.5 ms.
+ * @returns Number of decoded samples or @ref opus_errorcodes
+ */
+OPUS_EXPORT int opus_decoder_dred_decode_float(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, float *pcm, opus_int32 frame_size);
+
+
/** Parse an opus packet into one or more frames.
* Opus_decode will perform this operation internally so most applications do
* not need to use this function.
@@ -583,6 +693,14 @@ OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_frames(const unsigned
*/
OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len, opus_int32 Fs) OPUS_ARG_NONNULL(1);
+/** Checks whether an Opus packet has LBRR.
+ * @param [in] packet <tt>char*</tt>: Opus packet
+ * @param [in] len <tt>opus_int32</tt>: Length of packet
+ * @returns 1 is LBRR is present, 0 otherwise
+ * @retval OPUS_INVALID_PACKET The compressed data passed is corrupted or of an unsupported type
+ */
+OPUS_EXPORT OPUS_WARN_UNUSED_RESULT int opus_packet_has_lbrr(const unsigned char packet[], opus_int32 len);
+
/** Gets the number of samples of an Opus packet.
* @param [in] dec <tt>OpusDecoder*</tt>: Decoder state
* @param [in] packet <tt>char*</tt>: Opus packet
diff --git a/media/libopus/include/opus_defines.h b/media/libopus/include/opus_defines.h
index 94b9e0d9fc..cd8f4dde86 100644
--- a/media/libopus/include/opus_defines.h
+++ b/media/libopus/include/opus_defines.h
@@ -169,15 +169,32 @@ extern "C" {
#define OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST 4046
#define OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST 4047
#define OPUS_GET_IN_DTX_REQUEST 4049
+#define OPUS_SET_DRED_DURATION_REQUEST 4050
+#define OPUS_GET_DRED_DURATION_REQUEST 4051
+#define OPUS_SET_DNN_BLOB_REQUEST 4052
+/*#define OPUS_GET_DNN_BLOB_REQUEST 4053 */
/** Defines for the presence of extended APIs. */
#define OPUS_HAVE_OPUS_PROJECTION_H
/* Macros to trigger compilation errors when the wrong types are provided to a CTL */
#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
+
+#ifdef DISABLE_PTR_CHECK
+/* Disable checks to prevent ubsan from complaining about NULL checks
+ in test_opus_api. */
+#define __opus_check_int_ptr(ptr) (ptr)
+#define __opus_check_uint_ptr(ptr) (ptr)
+#define __opus_check_uint8_ptr(ptr) (ptr)
+#define __opus_check_val16_ptr(ptr) (ptr)
+#define __opus_check_void_ptr(ptr) (ptr)
+#else
#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr)))
#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr)))
+#define __opus_check_uint8_ptr(ptr) ((ptr) + ((ptr) - (opus_uint8*)(ptr)))
#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr)))
+#define __opus_check_void_ptr(x) ((void)((void *)0 == (x)), (x))
+#endif
/** @endcond */
/** @defgroup opus_ctlvalues Pre-defined values for CTL interface
@@ -620,6 +637,18 @@ extern "C" {
* @hideinitializer */
#define OPUS_GET_PREDICTION_DISABLED(x) OPUS_GET_PREDICTION_DISABLED_REQUEST, __opus_check_int_ptr(x)
+/** If non-zero, enables Deep Redundancy (DRED) and use the specified maximum number of 10-ms redundant frames
+ * @hideinitializer */
+#define OPUS_SET_DRED_DURATION(x) OPUS_SET_DRED_DURATION_REQUEST, __opus_check_int(x)
+/** Gets the encoder's configured Deep Redundancy (DRED) maximum number of frames.
+ * @hideinitializer */
+#define OPUS_GET_DRED_DURATION(x) OPUS_GET_DRED_DURATION_REQUEST, __opus_check_int_ptr(x)
+
+/** Provide external DNN weights from binary object (only when explicitly built without the weights)
+ * @hideinitializer */
+#define OPUS_SET_DNN_BLOB(data, len) OPUS_SET_DNN_BLOB_REQUEST, __opus_check_void_ptr(data), __opus_check_int(len)
+
+
/**@}*/
/** @defgroup opus_genericctls Generic CTLs
diff --git a/media/libopus/include/opus_multistream.h b/media/libopus/include/opus_multistream.h
index babcee6905..824cc55ac5 100644
--- a/media/libopus/include/opus_multistream.h
+++ b/media/libopus/include/opus_multistream.h
@@ -143,7 +143,7 @@ extern "C" {
* <a href="https://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810004.3.9">Vorbis
* channel ordering</a>. A decoder may wish to apply an additional permutation
* to the mapping the encoder used to achieve a different output channel
- * order (e.g. for outputing in WAV order).
+ * order (e.g. for outputting in WAV order).
*
* Each multistream packet contains an Opus packet for each stream, and all of
* the Opus packets in a single multistream packet must have the same
diff --git a/media/libopus/moz.build b/media/libopus/moz.build
index 6acb9b7e27..44c0ab7c90 100644
--- a/media/libopus/moz.build
+++ b/media/libopus/moz.build
@@ -21,7 +21,7 @@ FINAL_LIBRARY = "gkcodecs"
NoVisibilityFlags()
DEFINES["OPUS_BUILD"] = True
-DEFINES["OPUS_VERSION"] = "c85499757c148fede8604cffa12454206b6138ba"
+DEFINES["OPUS_VERSION"] = "ab4e83598e7fc8b2ce82dc633a0fc0c452b629aa"
DEFINES["USE_ALLOCA"] = True
DEFINES["ENABLE_HARDENING"] = True
diff --git a/media/libopus/moz.yaml b/media/libopus/moz.yaml
index 1b1ae14ec2..ed76d36d1f 100644
--- a/media/libopus/moz.yaml
+++ b/media/libopus/moz.yaml
@@ -20,11 +20,11 @@ origin:
# Human-readable identifier for this version/release
# Generally "version NNN", "tag SSS", "bookmark SSS"
- release: c85499757c148fede8604cffa12454206b6138ba (2023-11-03T15:07:54.000-07:00).
+ release: ab4e83598e7fc8b2ce82dc633a0fc0c452b629aa (2024-03-04T11:53:07.000-05:00).
# Revision to pull in
# Must be a long or short commit SHA (long preferred)
- revision: c85499757c148fede8604cffa12454206b6138ba
+ revision: ab4e83598e7fc8b2ce82dc633a0fc0c452b629aa
# The package's license, where possible using the mnemonic from
# https://spdx.org/licenses/
diff --git a/media/libopus/silk/API.h b/media/libopus/silk/API.h
index 4d90ff9aa3..878965c73a 100644
--- a/media/libopus/silk/API.h
+++ b/media/libopus/silk/API.h
@@ -34,6 +34,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "entenc.h"
#include "entdec.h"
+#ifdef ENABLE_DEEP_PLC
+#include "lpcnet_private.h"
+#endif
+
#ifdef __cplusplus
extern "C"
{
@@ -88,6 +92,16 @@ opus_int silk_Encode( /* O Returns error co
/* Decoder functions */
/****************************************/
+
+/***********************************************/
+/* Load OSCE models from external data pointer */
+/***********************************************/
+opus_int silk_LoadOSCEModels(
+ void *decState, /* O I/O State */
+ const unsigned char *data, /* I pointer to binary blob */
+ int len /* I length of binary blob data */
+);
+
/***********************************************/
/* Get size in bytes of the Silk decoder state */
/***********************************************/
@@ -96,8 +110,12 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
);
/*************************/
-/* Init or Reset decoder */
+/* Init and Reset decoder */
/*************************/
+opus_int silk_ResetDecoder( /* O Returns error code */
+ void *decState /* I/O State */
+);
+
opus_int silk_InitDecoder( /* O Returns error code */
void *decState /* I/O State */
);
@@ -113,6 +131,9 @@ opus_int silk_Decode( /* O Returns error co
ec_dec *psRangeDec, /* I/O Compressor data structure */
opus_int16 *samplesOut, /* O Decoded output speech vector */
opus_int32 *nSamplesOut, /* O Number of samples decoded */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
int arch /* I Run-time architecture */
);
diff --git a/media/libopus/silk/NSQ.c b/media/libopus/silk/NSQ.c
index c99ec5bce5..1caa829bbe 100644
--- a/media/libopus/silk/NSQ.c
+++ b/media/libopus/silk/NSQ.c
@@ -80,7 +80,7 @@ void silk_NSQ_c
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
diff --git a/media/libopus/silk/NSQ_del_dec.c b/media/libopus/silk/NSQ_del_dec.c
index 77f72cec3a..e8dadf1591 100644
--- a/media/libopus/silk/NSQ_del_dec.c
+++ b/media/libopus/silk/NSQ_del_dec.c
@@ -120,7 +120,7 @@ void silk_NSQ_del_dec_c(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
diff --git a/media/libopus/silk/PLC.c b/media/libopus/silk/PLC.c
index 4667440db2..b35bf750a0 100644
--- a/media/libopus/silk/PLC.c
+++ b/media/libopus/silk/PLC.c
@@ -33,6 +33,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
#include "PLC.h"
+#ifdef ENABLE_DEEP_PLC
+#include "lpcnet.h"
+#endif
+
#define NB_ATT 2
static const opus_int16 HARM_ATT_Q15[NB_ATT] = { 32440, 31130 }; /* 0.99, 0.95 */
static const opus_int16 PLC_RAND_ATTENUATE_V_Q15[NB_ATT] = { 31130, 26214 }; /* 0.95, 0.8 */
@@ -47,6 +51,9 @@ static OPUS_INLINE void silk_PLC_conceal(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* O LPC residual signal */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
int arch /* I Run-time architecture */
);
@@ -67,6 +74,9 @@ void silk_PLC(
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* I/O signal */
opus_int lost, /* I Loss flag */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
int arch /* I Run-time architecture */
)
{
@@ -80,7 +90,11 @@ void silk_PLC(
/****************************/
/* Generate Signal */
/****************************/
- silk_PLC_conceal( psDec, psDecCtrl, frame, arch );
+ silk_PLC_conceal( psDec, psDecCtrl, frame,
+#ifdef ENABLE_DEEP_PLC
+ lpcnet,
+#endif
+ arch );
psDec->lossCnt++;
} else {
@@ -88,6 +102,14 @@ void silk_PLC(
/* Update state */
/****************************/
silk_PLC_update( psDec, psDecCtrl );
+#ifdef ENABLE_DEEP_PLC
+ if ( lpcnet != NULL && psDec->sPLC.fs_kHz == 16 ) {
+ int k;
+ for( k = 0; k < psDec->nb_subfr; k += 2 ) {
+ lpcnet_plc_update( lpcnet, frame + k * psDec->subfr_length );
+ }
+ }
+#endif
}
}
@@ -195,6 +217,9 @@ static OPUS_INLINE void silk_PLC_conceal(
silk_decoder_state *psDec, /* I/O Decoder state */
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* O LPC residual signal */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
int arch /* I Run-time architecture */
)
{
@@ -371,6 +396,24 @@ static OPUS_INLINE void silk_PLC_conceal(
/* Scale with Gain */
frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );
}
+#ifdef ENABLE_DEEP_PLC
+ if ( lpcnet != NULL && lpcnet->loaded && psDec->sPLC.fs_kHz == 16 ) {
+ int run_deep_plc = psDec->sPLC.enable_deep_plc || lpcnet->fec_fill_pos != 0;
+ if( run_deep_plc ) {
+ for( k = 0; k < psDec->nb_subfr; k += 2 ) {
+ lpcnet_plc_conceal( lpcnet, frame + k * psDec->subfr_length );
+ }
+ /* We *should* be able to copy only from psDec->frame_length-MAX_LPC_ORDER, i.e. the last MAX_LPC_ORDER samples. */
+ for( i = 0; i < psDec->frame_length; i++ ) {
+ sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = (int)floor(.5 + frame[ i ] * (float)(1 << 24) / prevGain_Q10[ 1 ] );
+ }
+ } else {
+ for( k = 0; k < psDec->nb_subfr; k += 2 ) {
+ lpcnet_plc_update( lpcnet, frame + k * psDec->subfr_length );
+ }
+ }
+ }
+#endif
/* Save LPC state */
silk_memcpy( psDec->sLPC_Q14_buf, &sLPC_Q14_ptr[ psDec->frame_length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
@@ -431,12 +474,16 @@ void silk_PLC_glue_frames(
slope_Q16 = silk_DIV32_16( ( (opus_int32)1 << 16 ) - gain_Q16, length );
/* Make slope 4x steeper to avoid missing onsets after DTX */
slope_Q16 = silk_LSHIFT( slope_Q16, 2 );
-
- for( i = 0; i < length; i++ ) {
- frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] );
- gain_Q16 += slope_Q16;
- if( gain_Q16 > (opus_int32)1 << 16 ) {
- break;
+#ifdef ENABLE_DEEP_PLC
+ if ( psDec->sPLC.fs_kHz != 16 )
+#endif
+ {
+ for( i = 0; i < length; i++ ) {
+ frame[ i ] = silk_SMULWB( gain_Q16, frame[ i ] );
+ gain_Q16 += slope_Q16;
+ if( gain_Q16 > (opus_int32)1 << 16 ) {
+ break;
+ }
}
}
}
diff --git a/media/libopus/silk/PLC.h b/media/libopus/silk/PLC.h
index 6438f51633..1bebb78638 100644
--- a/media/libopus/silk/PLC.h
+++ b/media/libopus/silk/PLC.h
@@ -49,6 +49,9 @@ void silk_PLC(
silk_decoder_control *psDecCtrl, /* I/O Decoder control */
opus_int16 frame[], /* I/O signal */
opus_int lost, /* I Loss flag */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
int arch /* I Run-time architecture */
);
diff --git a/media/libopus/silk/arm/NSQ_del_dec_arm.h b/media/libopus/silk/arm/NSQ_del_dec_arm.h
index 9e76e16927..0c4fcfccb4 100644
--- a/media/libopus/silk/arm/NSQ_del_dec_arm.h
+++ b/media/libopus/silk/arm/NSQ_del_dec_arm.h
@@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
void silk_NSQ_del_dec_neon(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
- const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
+ const opus_int16 *PredCoef_Q12,
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
@@ -65,7 +65,7 @@ void silk_NSQ_del_dec_neon(
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
- const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
+ const opus_int16 *PredCoef_Q12,
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
diff --git a/media/libopus/silk/arm/NSQ_del_dec_neon_intr.c b/media/libopus/silk/arm/NSQ_del_dec_neon_intr.c
index 212410f362..668dde6dc4 100644
--- a/media/libopus/silk/arm/NSQ_del_dec_neon_intr.c
+++ b/media/libopus/silk/arm/NSQ_del_dec_neon_intr.c
@@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
#endif
#include "main.h"
#include "stack_alloc.h"
+#include "os_support.h"
/* NEON intrinsics optimization now can only parallelize up to 4 delay decision states. */
/* If there are more states, C function is called, and this optimization must be expanded. */
@@ -220,7 +221,7 @@ void silk_NSQ_del_dec_neon(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -279,6 +280,7 @@ void silk_NSQ_del_dec_neon(
/* Initialize delayed decision states */
ALLOC( psDelDec, 1, NSQ_del_decs_struct );
+ OPUS_CLEAR(psDelDec, 1);
/* Only RandState and RD_Q10 need to be initialized to 0. */
silk_memset( psDelDec->RandState, 0, sizeof( psDelDec->RandState ) );
vst1q_s32( psDelDec->RD_Q10, vdupq_n_s32( 0 ) );
@@ -587,6 +589,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_neon(
silk_assert( nStatesDelayedDecision > 0 );
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
ALLOC( psSampleState, 2, NSQ_samples_struct );
+ OPUS_CLEAR(psSampleState, 2);
shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
@@ -711,23 +714,26 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_neon(
const int rdo_offset = Lambda_Q10/2 - 512;
const uint16x4_t greaterThanRdo = vcgt_s16( q1_Q10_s16x4, vdup_n_s16( rdo_offset ) );
const uint16x4_t lessThanMinusRdo = vclt_s16( q1_Q10_s16x4, vdup_n_s16( -rdo_offset ) );
+ int16x4_t signed_offset = vbsl_s16( greaterThanRdo, vdup_n_s16( -rdo_offset ), vdup_n_s16( 0 ) );
+ signed_offset = vbsl_s16( lessThanMinusRdo, vdup_n_s16( rdo_offset ), signed_offset );
/* If Lambda_Q10 > 32767, then q1_Q0, q1_Q10 and q2_Q10 must change to 32-bit. */
silk_assert( Lambda_Q10 <= 32767 );
q1_Q0_s16x4 = vreinterpret_s16_u16( vclt_s16( q1_Q10_s16x4, vdup_n_s16( 0 ) ) );
- q1_Q0_s16x4 = vbsl_s16( greaterThanRdo, vsub_s16( q1_Q10_s16x4, vdup_n_s16( rdo_offset ) ), q1_Q0_s16x4 );
- q1_Q0_s16x4 = vbsl_s16( lessThanMinusRdo, vadd_s16( q1_Q10_s16x4, vdup_n_s16( rdo_offset ) ), q1_Q0_s16x4 );
+ q1_Q0_s16x4 = vbsl_s16(vorr_u16(greaterThanRdo, lessThanMinusRdo), vadd_s16( q1_Q10_s16x4 , signed_offset), q1_Q0_s16x4);
q1_Q0_s16x4 = vshr_n_s16( q1_Q0_s16x4, 10 );
}
{
const uint16x4_t equal0_u16x4 = vceq_s16( q1_Q0_s16x4, vdup_n_s16( 0 ) );
const uint16x4_t equalMinus1_u16x4 = vceq_s16( q1_Q0_s16x4, vdup_n_s16( -1 ) );
const uint16x4_t lessThanMinus1_u16x4 = vclt_s16( q1_Q0_s16x4, vdup_n_s16( -1 ) );
- int16x4_t tmp1_s16x4, tmp2_s16x4;
+ int16x4_t tmp1_s16x4, tmp2_s16x4, tmp_summand_s16x4;
q1_Q10_s16x4 = vshl_n_s16( q1_Q0_s16x4, 10 );
- tmp1_s16x4 = vadd_s16( q1_Q10_s16x4, vdup_n_s16( offset_Q10 - QUANT_LEVEL_ADJUST_Q10 ) );
- q1_Q10_s16x4 = vadd_s16( q1_Q10_s16x4, vdup_n_s16( offset_Q10 + QUANT_LEVEL_ADJUST_Q10 ) );
+ tmp_summand_s16x4 = vand_s16( vreinterpret_s16_u16(vcge_s16(q1_Q0_s16x4, vdup_n_s16(0))), vdup_n_s16( offset_Q10 - QUANT_LEVEL_ADJUST_Q10 ) );
+ tmp1_s16x4 = vadd_s16( q1_Q10_s16x4, tmp_summand_s16x4 );
+ tmp_summand_s16x4 = vbsl_s16( lessThanMinus1_u16x4, vdup_n_s16( offset_Q10 + QUANT_LEVEL_ADJUST_Q10 ), vdup_n_s16(0) );
+ q1_Q10_s16x4 = vadd_s16( q1_Q10_s16x4, tmp_summand_s16x4);
q1_Q10_s16x4 = vbsl_s16( lessThanMinus1_u16x4, q1_Q10_s16x4, tmp1_s16x4 );
q1_Q10_s16x4 = vbsl_s16( equal0_u16x4, vdup_n_s16( offset_Q10 ), q1_Q10_s16x4 );
q1_Q10_s16x4 = vbsl_s16( equalMinus1_u16x4, vdup_n_s16( offset_Q10 - ( 1024 - QUANT_LEVEL_ADJUST_Q10 ) ), q1_Q10_s16x4 );
@@ -818,6 +824,13 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_neon(
}
}
+ /* clear unused part of RD_Q10 to avoid overflows */
+ if( nStatesDelayedDecision < NEON_MAX_DEL_DEC_STATES )
+ {
+ OPUS_CLEAR(psSampleState[0].RD_Q10 + nStatesDelayedDecision, NEON_MAX_DEL_DEC_STATES - nStatesDelayedDecision);
+ OPUS_CLEAR(psSampleState[1].RD_Q10 + nStatesDelayedDecision, NEON_MAX_DEL_DEC_STATES - nStatesDelayedDecision);
+ }
+
/* Increase RD values of expired states */
{
uint32x4_t t_u32x4;
@@ -896,7 +909,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_neon(
vst1q_s32( psDelDec->Pred_Q15[ *smpl_buf_idx ], vshlq_n_s32( vld1q_s32( psSampleState[ 0 ].LPC_exc_Q14 ), 1 ) );
vst1q_s32( psDelDec->Shape_Q14[ *smpl_buf_idx ], vld1q_s32( psSampleState[ 0 ].sLTP_shp_Q14 ) );
tmp1_s32x4 = vrshrq_n_s32( tmp1_s32x4, 10 );
- tmp1_s32x4 = vaddq_s32( vld1q_s32( psDelDec->Seed ), tmp1_s32x4 );
+ tmp1_s32x4 = vreinterpretq_s32_u32( vaddq_u32( vreinterpretq_u32_s32(
+ vld1q_s32( psDelDec->Seed ) ), vreinterpretq_u32_s32( tmp1_s32x4 ) ) );
vst1q_s32( psDelDec->Seed, tmp1_s32x4 );
vst1q_s32( psDelDec->RandState[ *smpl_buf_idx ], tmp1_s32x4 );
vst1q_s32( psDelDec->RD_Q10, vld1q_s32( psSampleState[ 0 ].RD_Q10 ) );
diff --git a/media/libopus/silk/arm/NSQ_neon.h b/media/libopus/silk/arm/NSQ_neon.h
index b31d9442d6..f03d8ddd98 100644
--- a/media/libopus/silk/arm/NSQ_neon.h
+++ b/media/libopus/silk/arm/NSQ_neon.h
@@ -73,7 +73,7 @@ static OPUS_INLINE void silk_short_prediction_create_arch_coef_neon(opus_int32 *
#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#define silk_short_prediction_create_arch_coef(out, in, order) \
- do { if (arch == OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0)
+ do { if (arch >= OPUS_ARCH_ARM_NEON) { silk_short_prediction_create_arch_coef_neon(out, in, order); } } while (0)
#endif
@@ -95,7 +95,7 @@ opus_int32 silk_NSQ_noise_shape_feedback_loop_neon(const opus_int32 *data0, opus
(coef vs. coefRev) so can't use the usual IMPL table implementation */
#undef silk_noise_shape_quantizer_short_prediction
#define silk_noise_shape_quantizer_short_prediction(in, coef, coefRev, order, arch) \
- (arch == OPUS_ARCH_ARM_NEON ? \
+ (arch >= OPUS_ARCH_ARM_NEON ? \
silk_noise_shape_quantizer_short_prediction_neon(in, coefRev, order) : \
silk_noise_shape_quantizer_short_prediction_c(in, coef, order))
diff --git a/media/libopus/silk/arm/arm_silk_map.c b/media/libopus/silk/arm/arm_silk_map.c
index 0b9bfec2ca..a91f79b59f 100644
--- a/media/libopus/silk/arm/arm_silk_map.c
+++ b/media/libopus/silk/arm/arm_silk_map.c
@@ -49,6 +49,7 @@ void (*const SILK_BIQUAD_ALT_STRIDE2_IMPL[OPUS_ARCHMASK + 1])(
silk_biquad_alt_stride2_c, /* EDSP */
silk_biquad_alt_stride2_c, /* Media */
silk_biquad_alt_stride2_neon, /* Neon */
+ silk_biquad_alt_stride2_neon, /* dotprod */
};
opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK + 1])( /* O Returns inverse prediction gain in energy domain, Q30 */
@@ -59,6 +60,7 @@ opus_int32 (*const SILK_LPC_INVERSE_PRED_GAIN_IMPL[OPUS_ARCHMASK + 1])( /* O R
silk_LPC_inverse_pred_gain_c, /* EDSP */
silk_LPC_inverse_pred_gain_c, /* Media */
silk_LPC_inverse_pred_gain_neon, /* Neon */
+ silk_LPC_inverse_pred_gain_neon, /* dotprod */
};
void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
@@ -67,7 +69,7 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -82,6 +84,7 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
silk_NSQ_del_dec_c, /* EDSP */
silk_NSQ_del_dec_c, /* Media */
silk_NSQ_del_dec_neon, /* Neon */
+ silk_NSQ_del_dec_neon, /* dotprod */
};
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
@@ -97,6 +100,7 @@ opus_int32
silk_NSQ_noise_shape_feedback_loop_c, /* EDSP */
silk_NSQ_noise_shape_feedback_loop_c, /* Media */
silk_NSQ_noise_shape_feedback_loop_neon, /* NEON */
+ silk_NSQ_noise_shape_feedback_loop_neon, /* dotprod */
};
# endif
@@ -116,6 +120,7 @@ void (*const SILK_WARPED_AUTOCORRELATION_FIX_IMPL[OPUS_ARCHMASK + 1])(
silk_warped_autocorrelation_FIX_c, /* EDSP */
silk_warped_autocorrelation_FIX_c, /* Media */
silk_warped_autocorrelation_FIX_neon, /* Neon */
+ silk_warped_autocorrelation_FIX_neon, /* dotprod */
};
# endif
diff --git a/media/libopus/silk/control.h b/media/libopus/silk/control.h
index b76ec33cd6..f5633e624e 100644
--- a/media/libopus/silk/control.h
+++ b/media/libopus/silk/control.h
@@ -77,6 +77,9 @@ typedef struct {
/* I: Flag to enable in-band Forward Error Correction (FEC); 0/1 */
opus_int useInBandFEC;
+ /* I: Flag to enable in-band Deep REDundancy (DRED); 0/1 */
+ opus_int useDRED;
+
/* I: Flag to actually code in-band Forward Error Correction (FEC) in the current packet; 0/1 */
opus_int LBRR_coded;
@@ -141,6 +144,14 @@ typedef struct {
/* O: Pitch lag of previous frame (0 if unvoiced), measured in samples at 48 kHz */
opus_int prevPitchLag;
+
+ /* I: Enable Deep PLC */
+ opus_int enable_deep_plc;
+
+#ifdef ENABLE_OSCE
+ /* I: OSCE method */
+ opus_int osce_method;
+#endif
} silk_DecControlStruct;
#ifdef __cplusplus
diff --git a/media/libopus/silk/dec_API.c b/media/libopus/silk/dec_API.c
index 7d5ca7fb9f..c1091d13ed 100644
--- a/media/libopus/silk/dec_API.c
+++ b/media/libopus/silk/dec_API.c
@@ -33,6 +33,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
#include "os_support.h"
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#include "osce_structs.h"
+#endif
+
/************************/
/* Decoder Super Struct */
/************************/
@@ -42,12 +47,33 @@ typedef struct {
opus_int nChannelsAPI;
opus_int nChannelsInternal;
opus_int prev_decode_only_middle;
+#ifdef ENABLE_OSCE
+ OSCEModel osce_model;
+#endif
} silk_decoder;
/*********************/
/* Decoder functions */
/*********************/
+
+
+opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len)
+{
+#ifdef ENABLE_OSCE
+ opus_int ret = SILK_NO_ERROR;
+
+ ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len);
+ ((silk_decoder *)decState)->osce_model.loaded = (ret == 0);
+ return ret;
+#else
+ (void) decState;
+ (void) data;
+ (void) len;
+ return SILK_NO_ERROR;
+#endif
+}
+
opus_int silk_Get_Decoder_Size( /* O Returns error code */
opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
)
@@ -60,12 +86,37 @@ opus_int silk_Get_Decoder_Size( /* O Returns error co
}
/* Reset decoder state */
+opus_int silk_ResetDecoder( /* O Returns error code */
+ void *decState /* I/O State */
+)
+{
+ opus_int n, ret = SILK_NO_ERROR;
+ silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+
+ for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
+ ret = silk_reset_decoder( &channel_state[ n ] );
+ }
+ silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
+ /* Not strictly needed, but it's cleaner that way */
+ ((silk_decoder *)decState)->prev_decode_only_middle = 0;
+
+ return ret;
+}
+
+
opus_int silk_InitDecoder( /* O Returns error code */
void *decState /* I/O State */
)
{
opus_int n, ret = SILK_NO_ERROR;
silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
+#ifdef ENABLE_OSCE
+ ((silk_decoder *)decState)->osce_model.loaded = 0;
+#endif
+#ifndef USE_WEIGHTS_FILE
+ /* load osce models */
+ silk_LoadOSCEModels(decState, NULL, 0);
+#endif
for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
ret = silk_init_decoder( &channel_state[ n ] );
@@ -86,6 +137,9 @@ opus_int silk_Decode( /* O Returns error co
ec_dec *psRangeDec, /* I/O Compressor data structure */
opus_int16 *samplesOut, /* O Decoded output speech vector */
opus_int32 *nSamplesOut, /* O Number of samples decoded */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
int arch /* I Run-time architecture */
)
{
@@ -278,6 +332,7 @@ opus_int silk_Decode( /* O Returns error co
has_side = !psDec->prev_decode_only_middle
|| (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
}
+ channel_state[ 0 ].sPLC.enable_deep_plc = decControl->enable_deep_plc;
/* Call decoder for one frame */
for( n = 0; n < decControl->nChannelsInternal; n++ ) {
if( n == 0 || has_side ) {
@@ -297,7 +352,19 @@ opus_int silk_Decode( /* O Returns error co
} else {
condCoding = CODE_CONDITIONALLY;
}
- ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch);
+#ifdef ENABLE_OSCE
+ if ( channel_state[n].osce.method != decControl->osce_method ) {
+ osce_reset( &channel_state[n].osce, decControl->osce_method );
+ }
+#endif
+ ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding,
+#ifdef ENABLE_DEEP_PLC
+ n == 0 ? lpcnet : NULL,
+#endif
+#ifdef ENABLE_OSCE
+ &psDec->osce_model,
+#endif
+ arch);
} else {
silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
}
diff --git a/media/libopus/silk/decode_frame.c b/media/libopus/silk/decode_frame.c
index 4f36f854c2..9bc4ca2b0e 100644
--- a/media/libopus/silk/decode_frame.c
+++ b/media/libopus/silk/decode_frame.c
@@ -33,6 +33,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "stack_alloc.h"
#include "PLC.h"
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
/****************/
/* Decode frame */
/****************/
@@ -43,6 +47,12 @@ opus_int silk_decode_frame(
opus_int32 *pN, /* O Pointer to size of output frame */
opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
opus_int condCoding, /* I The type of conditional coding to use */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
+#ifdef ENABLE_OSCE
+ OSCEModel *osce_model,
+#endif
int arch /* I Run-time architecture */
)
{
@@ -61,6 +71,10 @@ opus_int silk_decode_frame(
( lostFlag == FLAG_DECODE_LBRR && psDec->LBRR_flags[ psDec->nFramesDecoded ] == 1 ) )
{
VARDECL( opus_int16, pulses );
+#ifdef ENABLE_OSCE
+ opus_int32 ec_start;
+ ec_start = ec_tell(psRangeDec);
+#endif
ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 );
/*********************************************/
@@ -84,10 +98,29 @@ opus_int silk_decode_frame(
/********************************************************/
silk_decode_core( psDec, psDecCtrl, pOut, pulses, arch );
+ /*************************/
+ /* Update output buffer. */
+ /*************************/
+ celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
+ mv_len = psDec->ltp_mem_length - psDec->frame_length;
+ silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+ silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+
+#ifdef ENABLE_OSCE
+ /********************************************************/
+ /* Run SILK enhancer */
+ /********************************************************/
+ osce_enhance_frame( osce_model, psDec, psDecCtrl, pOut, ec_tell(psRangeDec) - ec_start, arch );
+#endif
+
/********************************************************/
/* Update PLC state */
/********************************************************/
- silk_PLC( psDec, psDecCtrl, pOut, 0, arch );
+ silk_PLC( psDec, psDecCtrl, pOut, 0,
+#ifdef ENABLE_DEEP_PLC
+ lpcnet,
+#endif
+ arch );
psDec->lossCnt = 0;
psDec->prevSignalType = psDec->indices.signalType;
@@ -97,16 +130,23 @@ opus_int silk_decode_frame(
psDec->first_frame_after_reset = 0;
} else {
/* Handle packet loss by extrapolation */
- silk_PLC( psDec, psDecCtrl, pOut, 1, arch );
- }
+ silk_PLC( psDec, psDecCtrl, pOut, 1,
+#ifdef ENABLE_DEEP_PLC
+ lpcnet,
+#endif
+ arch );
- /*************************/
- /* Update output buffer. */
- /*************************/
- celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
- mv_len = psDec->ltp_mem_length - psDec->frame_length;
- silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
- silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+#ifdef ENABLE_OSCE
+ osce_reset( &psDec->osce, psDec->osce.method );
+#endif
+ /*************************/
+ /* Update output buffer. */
+ /*************************/
+ celt_assert( psDec->ltp_mem_length >= psDec->frame_length );
+ mv_len = psDec->ltp_mem_length - psDec->frame_length;
+ silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
+ silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
+ }
/************************************************/
/* Comfort noise generation / estimation */
diff --git a/media/libopus/silk/enc_API.c b/media/libopus/silk/enc_API.c
index 548e07364d..369caddd98 100644
--- a/media/libopus/silk/enc_API.c
+++ b/media/libopus/silk/enc_API.c
@@ -41,6 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main_FLP.h"
#endif
+#ifdef ENABLE_DRED
+#include "dred_encoder.h"
+#endif
+
/***************************************/
/* Read control structure from encoder */
/***************************************/
diff --git a/media/libopus/silk/fixed/encode_frame_FIX.c b/media/libopus/silk/fixed/encode_frame_FIX.c
index a02bf87dbb..7c83360ba3 100644
--- a/media/libopus/silk/fixed/encode_frame_FIX.c
+++ b/media/libopus/silk/fixed/encode_frame_FIX.c
@@ -105,8 +105,11 @@ opus_int silk_encode_frame_FIX(
opus_int gain_lock[ MAX_NB_SUBFR ] = {0};
opus_int16 best_gain_mult[ MAX_NB_SUBFR ];
opus_int best_sum[ MAX_NB_SUBFR ];
+ opus_int bits_margin;
SAVE_STACK;
+ /* For CBR, 5 bits below budget is close enough. For VBR, allow up to 25% below the cap if we initially busted the budget. */
+ bits_margin = useCBR ? 5 : maxBits/4;
/* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
@@ -282,7 +285,7 @@ opus_int silk_encode_frame_FIX(
gainMult_upper = gainMult_Q8;
gainsID_upper = gainsID;
}
- } else if( nBits < maxBits - 5 ) {
+ } else if( nBits < maxBits - bits_margin ) {
found_lower = 1;
nBits_lower = nBits;
gainMult_lower = gainMult_Q8;
@@ -296,7 +299,7 @@ opus_int silk_encode_frame_FIX(
LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
}
} else {
- /* Within 5 bits of budget: close enough */
+ /* Close enough */
break;
}
@@ -318,17 +321,10 @@ opus_int silk_encode_frame_FIX(
if( ( found_lower & found_upper ) == 0 ) {
/* Adjust gain according to high-rate rate/distortion curve */
if( nBits > maxBits ) {
- if (gainMult_Q8 < 16384) {
- gainMult_Q8 *= 2;
- } else {
- gainMult_Q8 = 32767;
- }
+ gainMult_Q8 = silk_min_32( 1024, gainMult_Q8*3/2 );
} else {
- opus_int32 gain_factor_Q16;
- gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
- gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+ gainMult_Q8 = silk_max_32( 64, gainMult_Q8*4/5 );
}
-
} else {
/* Adjust gain by interpolating */
gainMult_Q8 = gainMult_lower + silk_DIV32_16( silk_MUL( gainMult_upper - gainMult_lower, maxBits - nBits_lower ), nBits_upper - nBits_lower );
diff --git a/media/libopus/silk/float/SigProc_FLP.h b/media/libopus/silk/float/SigProc_FLP.h
index 953de8b09e..ff9281b852 100644
--- a/media/libopus/silk/float/SigProc_FLP.h
+++ b/media/libopus/silk/float/SigProc_FLP.h
@@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "SigProc_FIX.h"
#include "float_cast.h"
+#include "main.h"
#include <math.h>
#ifdef __cplusplus
@@ -73,7 +74,8 @@ void silk_autocorrelation_FLP(
silk_float *results, /* O result (length correlationCount) */
const silk_float *inputData, /* I input data to correlate */
opus_int inputDataSize, /* I length of input */
- opus_int correlationCount /* I number of correlation taps to compute */
+ opus_int correlationCount, /* I number of correlation taps to compute */
+ int arch
);
opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced, 1 unvoiced */
@@ -105,7 +107,8 @@ silk_float silk_burg_modified_FLP( /* O returns residual energy
const silk_float minInvGain, /* I minimum inverse prediction gain */
const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
const opus_int nb_subfr, /* I number of subframes stacked in x */
- const opus_int D /* I order */
+ const opus_int D, /* I order */
+ int arch
);
/* multiply a vector by a constant */
@@ -124,12 +127,17 @@ void silk_scale_copy_vector_FLP(
);
/* inner product of two silk_float arrays, with result as double */
-double silk_inner_product_FLP(
+double silk_inner_product_FLP_c(
const silk_float *data1,
const silk_float *data2,
opus_int dataSize
);
+#ifndef OVERRIDE_inner_product_FLP
+#define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,silk_inner_product_FLP_c(data1, data2, dataSize))
+#endif
+
+
/* sum of squares of a silk_float array, with result as double */
double silk_energy_FLP(
const silk_float *data,
diff --git a/media/libopus/silk/float/autocorrelation_FLP.c b/media/libopus/silk/float/autocorrelation_FLP.c
index 8b8a9e659a..4253b26ebc 100644
--- a/media/libopus/silk/float/autocorrelation_FLP.c
+++ b/media/libopus/silk/float/autocorrelation_FLP.c
@@ -37,7 +37,8 @@ void silk_autocorrelation_FLP(
silk_float *results, /* O result (length correlationCount) */
const silk_float *inputData, /* I input data to correlate */
opus_int inputDataSize, /* I length of input */
- opus_int correlationCount /* I number of correlation taps to compute */
+ opus_int correlationCount, /* I number of correlation taps to compute */
+ int arch
)
{
opus_int i;
@@ -47,6 +48,6 @@ void silk_autocorrelation_FLP(
}
for( i = 0; i < correlationCount; i++ ) {
- results[ i ] = (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i );
+ results[ i ] = (silk_float)silk_inner_product_FLP( inputData, inputData + i, inputDataSize - i, arch );
}
}
diff --git a/media/libopus/silk/float/burg_modified_FLP.c b/media/libopus/silk/float/burg_modified_FLP.c
index 756b76a35b..f5bef5ddbe 100644
--- a/media/libopus/silk/float/burg_modified_FLP.c
+++ b/media/libopus/silk/float/burg_modified_FLP.c
@@ -42,7 +42,8 @@ silk_float silk_burg_modified_FLP( /* O returns residual energy
const silk_float minInvGain, /* I minimum inverse prediction gain */
const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
const opus_int nb_subfr, /* I number of subframes stacked in x */
- const opus_int D /* I order */
+ const opus_int D, /* I order */
+ int arch
)
{
opus_int k, n, s, reached_max_gain;
@@ -60,7 +61,7 @@ silk_float silk_burg_modified_FLP( /* O returns residual energy
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
for( n = 1; n < D + 1; n++ ) {
- C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
+ C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n, arch );
}
}
silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
diff --git a/media/libopus/silk/float/corrMatrix_FLP.c b/media/libopus/silk/float/corrMatrix_FLP.c
index eae6a1cfca..eef6e8aa79 100644
--- a/media/libopus/silk/float/corrMatrix_FLP.c
+++ b/media/libopus/silk/float/corrMatrix_FLP.c
@@ -41,7 +41,8 @@ void silk_corrVector_FLP(
const silk_float *t, /* I Target vector [L] */
const opus_int L, /* I Length of vecors */
const opus_int Order, /* I Max lag for correlation */
- silk_float *Xt /* O X'*t correlation vector [order] */
+ silk_float *Xt, /* O X'*t correlation vector [order] */
+ int arch
)
{
opus_int lag;
@@ -50,7 +51,7 @@ void silk_corrVector_FLP(
ptr1 = &x[ Order - 1 ]; /* Points to first sample of column 0 of X: X[:,0] */
for( lag = 0; lag < Order; lag++ ) {
/* Calculate X[:,lag]'*t */
- Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L );
+ Xt[ lag ] = (silk_float)silk_inner_product_FLP( ptr1, t, L, arch );
ptr1--; /* Next column of X */
}
}
@@ -60,7 +61,8 @@ void silk_corrMatrix_FLP(
const silk_float *x, /* I x vector [ L+order-1 ] used to create X */
const opus_int L, /* I Length of vectors */
const opus_int Order, /* I Max lag for correlation */
- silk_float *XX /* O X'*X correlation matrix [order x order] */
+ silk_float *XX, /* O X'*X correlation matrix [order x order] */
+ int arch
)
{
opus_int j, lag;
@@ -79,7 +81,7 @@ void silk_corrMatrix_FLP(
ptr2 = &x[ Order - 2 ]; /* First sample of column 1 of X */
for( lag = 1; lag < Order; lag++ ) {
/* Calculate X[:,0]'*X[:,lag] */
- energy = silk_inner_product_FLP( ptr1, ptr2, L );
+ energy = silk_inner_product_FLP( ptr1, ptr2, L, arch );
matrix_ptr( XX, lag, 0, Order ) = ( silk_float )energy;
matrix_ptr( XX, 0, lag, Order ) = ( silk_float )energy;
/* Calculate X[:,j]'*X[:,j + lag] */
diff --git a/media/libopus/silk/float/encode_frame_FLP.c b/media/libopus/silk/float/encode_frame_FLP.c
index b029c3f5ca..8a327c5626 100644
--- a/media/libopus/silk/float/encode_frame_FLP.c
+++ b/media/libopus/silk/float/encode_frame_FLP.c
@@ -107,7 +107,10 @@ opus_int silk_encode_frame_FLP(
opus_int gain_lock[ MAX_NB_SUBFR ] = {0};
opus_int16 best_gain_mult[ MAX_NB_SUBFR ];
opus_int best_sum[ MAX_NB_SUBFR ];
+ opus_int bits_margin;
+ /* For CBR, 5 bits below budget is close enough. For VBR, allow up to 25% below the cap if we initially busted the budget. */
+ bits_margin = useCBR ? 5 : maxBits/4;
/* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
@@ -270,7 +273,7 @@ opus_int silk_encode_frame_FLP(
gainMult_upper = gainMult_Q8;
gainsID_upper = gainsID;
}
- } else if( nBits < maxBits - 5 ) {
+ } else if( nBits < maxBits - bits_margin ) {
found_lower = 1;
nBits_lower = nBits;
gainMult_lower = gainMult_Q8;
@@ -284,7 +287,7 @@ opus_int silk_encode_frame_FLP(
LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
}
} else {
- /* Within 5 bits of budget: close enough */
+ /* Close enough */
break;
}
@@ -306,15 +309,9 @@ opus_int silk_encode_frame_FLP(
if( ( found_lower & found_upper ) == 0 ) {
/* Adjust gain according to high-rate rate/distortion curve */
if( nBits > maxBits ) {
- if (gainMult_Q8 < 16384) {
- gainMult_Q8 *= 2;
- } else {
- gainMult_Q8 = 32767;
- }
+ gainMult_Q8 = silk_min_32( 1024, gainMult_Q8*3/2 );
} else {
- opus_int32 gain_factor_Q16;
- gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
- gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
+ gainMult_Q8 = silk_max_32( 64, gainMult_Q8*4/5 );
}
} else {
/* Adjust gain by interpolating */
diff --git a/media/libopus/silk/float/find_LPC_FLP.c b/media/libopus/silk/float/find_LPC_FLP.c
index fa3ffe7f8b..6ccd711dc3 100644
--- a/media/libopus/silk/float/find_LPC_FLP.c
+++ b/media/libopus/silk/float/find_LPC_FLP.c
@@ -38,7 +38,8 @@ void silk_find_LPC_FLP(
silk_encoder_state *psEncC, /* I/O Encoder state */
opus_int16 NLSF_Q15[], /* O NLSFs */
const silk_float x[], /* I Input signal */
- const silk_float minInvGain /* I Inverse of max prediction gain */
+ const silk_float minInvGain, /* I Inverse of max prediction gain */
+ int arch
)
{
opus_int k, subfr_length;
@@ -56,12 +57,12 @@ void silk_find_LPC_FLP(
psEncC->indices.NLSFInterpCoef_Q2 = 4;
/* Burg AR analysis for the full frame */
- res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder );
+ res_nrg = silk_burg_modified_FLP( a, x, minInvGain, subfr_length, psEncC->nb_subfr, psEncC->predictLPCOrder, arch );
if( psEncC->useInterpolatedNLSFs && !psEncC->first_frame_after_reset && psEncC->nb_subfr == MAX_NB_SUBFR ) {
/* Optimal solution for last 10 ms; subtract residual energy here, as that's easier than */
/* adding it to the residual energy of the first 10 ms in each iteration of the search below */
- res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder );
+ res_nrg -= silk_burg_modified_FLP( a_tmp, x + ( MAX_NB_SUBFR / 2 ) * subfr_length, minInvGain, subfr_length, MAX_NB_SUBFR / 2, psEncC->predictLPCOrder, arch );
/* Convert to NLSFs */
silk_A2NLSF_FLP( NLSF_Q15, a_tmp, psEncC->predictLPCOrder );
diff --git a/media/libopus/silk/float/find_LTP_FLP.c b/media/libopus/silk/float/find_LTP_FLP.c
index f97064930e..90aeeac0b7 100644
--- a/media/libopus/silk/float/find_LTP_FLP.c
+++ b/media/libopus/silk/float/find_LTP_FLP.c
@@ -38,7 +38,8 @@ void silk_find_LTP_FLP(
const silk_float r_ptr[], /* I LPC residual */
const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
const opus_int subfr_length, /* I Subframe length */
- const opus_int nb_subfr /* I number of subframes */
+ const opus_int nb_subfr, /* I number of subframes */
+ int arch
)
{
opus_int k;
@@ -50,8 +51,8 @@ void silk_find_LTP_FLP(
XX_ptr = XX;
for( k = 0; k < nb_subfr; k++ ) {
lag_ptr = r_ptr - ( lag[ k ] + LTP_ORDER / 2 );
- silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, XX_ptr );
- silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xX_ptr );
+ silk_corrMatrix_FLP( lag_ptr, subfr_length, LTP_ORDER, XX_ptr, arch );
+ silk_corrVector_FLP( lag_ptr, r_ptr, subfr_length, LTP_ORDER, xX_ptr, arch );
xx = ( silk_float )silk_energy_FLP( r_ptr, subfr_length + LTP_ORDER );
temp = 1.0f / silk_max( xx, LTP_CORR_INV_MAX * 0.5f * ( XX_ptr[ 0 ] + XX_ptr[ 24 ] ) + 1.0f );
silk_scale_vector_FLP( XX_ptr, temp, LTP_ORDER * LTP_ORDER );
diff --git a/media/libopus/silk/float/find_pitch_lags_FLP.c b/media/libopus/silk/float/find_pitch_lags_FLP.c
index dedbcd2836..1f6bd5991c 100644
--- a/media/libopus/silk/float/find_pitch_lags_FLP.c
+++ b/media/libopus/silk/float/find_pitch_lags_FLP.c
@@ -82,7 +82,7 @@ void silk_find_pitch_lags_FLP(
silk_apply_sine_window_FLP( Wsig_ptr, x_buf_ptr, 2, psEnc->sCmn.la_pitch );
/* Calculate autocorrelation sequence */
- silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1 );
+ silk_autocorrelation_FLP( auto_corr, Wsig, psEnc->sCmn.pitch_LPC_win_length, psEnc->sCmn.pitchEstimationLPCOrder + 1, arch );
/* Add white noise, as a fraction of the energy */
auto_corr[ 0 ] += auto_corr[ 0 ] * FIND_PITCH_WHITE_NOISE_FRACTION + 1;
diff --git a/media/libopus/silk/float/find_pred_coefs_FLP.c b/media/libopus/silk/float/find_pred_coefs_FLP.c
index 6f79078893..f3c54cf474 100644
--- a/media/libopus/silk/float/find_pred_coefs_FLP.c
+++ b/media/libopus/silk/float/find_pred_coefs_FLP.c
@@ -63,7 +63,7 @@ void silk_find_pred_coefs_FLP(
celt_assert( psEnc->sCmn.ltp_mem_length - psEnc->sCmn.predictLPCOrder >= psEncCtrl->pitchL[ 0 ] + LTP_ORDER / 2 );
/* LTP analysis */
- silk_find_LTP_FLP( XXLTP, xXLTP, res_pitch, psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr );
+ silk_find_LTP_FLP( XXLTP, xXLTP, res_pitch, psEncCtrl->pitchL, psEnc->sCmn.subfr_length, psEnc->sCmn.nb_subfr, psEnc->sCmn.arch );
/* Quantize LTP gain parameters */
silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex,
@@ -102,7 +102,7 @@ void silk_find_pred_coefs_FLP(
}
/* LPC_in_pre contains the LTP-filtered input for voiced, and the unfiltered input for unvoiced */
- silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain );
+ silk_find_LPC_FLP( &psEnc->sCmn, NLSF_Q15, LPC_in_pre, minInvGain, psEnc->sCmn.arch );
/* Quantize LSFs */
silk_process_NLSFs_FLP( &psEnc->sCmn, psEncCtrl->PredCoef, NLSF_Q15, psEnc->sCmn.prev_NLSFq_Q15 );
diff --git a/media/libopus/silk/float/inner_product_FLP.c b/media/libopus/silk/float/inner_product_FLP.c
index cdd39d24ce..88b160ab40 100644
--- a/media/libopus/silk/float/inner_product_FLP.c
+++ b/media/libopus/silk/float/inner_product_FLP.c
@@ -32,7 +32,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "SigProc_FLP.h"
/* inner product of two silk_float arrays, with result as double */
-double silk_inner_product_FLP(
+double silk_inner_product_FLP_c(
const silk_float *data1,
const silk_float *data2,
opus_int dataSize
diff --git a/media/libopus/silk/float/main_FLP.h b/media/libopus/silk/float/main_FLP.h
index 5dc0ccf4a4..2e4435cc68 100644
--- a/media/libopus/silk/float/main_FLP.h
+++ b/media/libopus/silk/float/main_FLP.h
@@ -138,7 +138,8 @@ void silk_find_LPC_FLP(
silk_encoder_state *psEncC, /* I/O Encoder state */
opus_int16 NLSF_Q15[], /* O NLSFs */
const silk_float x[], /* I Input signal */
- const silk_float minInvGain /* I Prediction gain from LTP (dB) */
+ const silk_float minInvGain, /* I Prediction gain from LTP (dB) */
+ int arch
);
/* LTP analysis */
@@ -148,7 +149,8 @@ void silk_find_LTP_FLP(
const silk_float r_ptr[], /* I LPC residual */
const opus_int lag[ MAX_NB_SUBFR ], /* I LTP lags */
const opus_int subfr_length, /* I Subframe length */
- const opus_int nb_subfr /* I number of subframes */
+ const opus_int nb_subfr, /* I number of subframes */
+ int arch
);
void silk_LTP_analysis_filter_FLP(
@@ -221,7 +223,8 @@ void silk_corrMatrix_FLP(
const silk_float *x, /* I x vector [ L+order-1 ] used to create X */
const opus_int L, /* I Length of vectors */
const opus_int Order, /* I Max lag for correlation */
- silk_float *XX /* O X'*X correlation matrix [order x order] */
+ silk_float *XX, /* O X'*X correlation matrix [order x order] */
+ int arch
);
/* Calculates correlation vector X'*t */
@@ -230,7 +233,8 @@ void silk_corrVector_FLP(
const silk_float *t, /* I Target vector [L] */
const opus_int L, /* I Length of vecors */
const opus_int Order, /* I Max lag for correlation */
- silk_float *Xt /* O X'*t correlation vector [order] */
+ silk_float *Xt, /* O X'*t correlation vector [order] */
+ int arch
);
/* Apply sine window to signal vector. */
diff --git a/media/libopus/silk/float/noise_shape_analysis_FLP.c b/media/libopus/silk/float/noise_shape_analysis_FLP.c
index cb3d8a50b7..0b5ea95218 100644
--- a/media/libopus/silk/float/noise_shape_analysis_FLP.c
+++ b/media/libopus/silk/float/noise_shape_analysis_FLP.c
@@ -255,7 +255,7 @@ void silk_noise_shape_analysis_FLP(
psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );
} else {
/* Calculate regular auto correlation */
- silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 );
+ silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, psEnc->sCmn.arch );
}
/* Add white noise, as a fraction of energy */
diff --git a/media/libopus/silk/float/pitch_analysis_core_FLP.c b/media/libopus/silk/float/pitch_analysis_core_FLP.c
index f351bc3718..0530a8831a 100644
--- a/media/libopus/silk/float/pitch_analysis_core_FLP.c
+++ b/media/libopus/silk/float/pitch_analysis_core_FLP.c
@@ -291,7 +291,7 @@ opus_int silk_pitch_analysis_core_FLP( /* O Voicing estimate: 0 voiced,
for( j = 0; j < length_d_comp; j++ ) {
d = d_comp[ j ];
basis_ptr = target_ptr - d;
- cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );
+ cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz, arch );
if( cross_corr > 0.0f ) {
energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );
C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );
diff --git a/media/libopus/silk/float/warped_autocorrelation_FLP.c b/media/libopus/silk/float/warped_autocorrelation_FLP.c
index 09186e73d4..116dab923f 100644
--- a/media/libopus/silk/float/warped_autocorrelation_FLP.c
+++ b/media/libopus/silk/float/warped_autocorrelation_FLP.c
@@ -54,11 +54,13 @@ void silk_warped_autocorrelation_FLP(
/* Loop over allpass sections */
for( i = 0; i < order; i += 2 ) {
/* Output of allpass section */
- tmp2 = state[ i ] + warping * ( state[ i + 1 ] - tmp1 );
+ /* We voluntarily use two multiples instead of factoring the expression to
+ reduce the length of the dependency chain (tmp1->tmp2->tmp1... ). */
+ tmp2 = state[ i ] + warping * state[ i + 1 ] - warping * tmp1;
state[ i ] = tmp1;
C[ i ] += state[ 0 ] * tmp1;
/* Output of allpass section */
- tmp1 = state[ i + 1 ] + warping * ( state[ i + 2 ] - tmp2 );
+ tmp1 = state[ i + 1 ] + warping * state[ i + 2 ] - warping * tmp2;
state[ i + 1 ] = tmp2;
C[ i + 1 ] += state[ 0 ] * tmp2;
}
diff --git a/media/libopus/silk/float/x86/inner_product_FLP_avx2.c b/media/libopus/silk/float/x86/inner_product_FLP_avx2.c
new file mode 100644
index 0000000000..4a2daaf595
--- /dev/null
+++ b/media/libopus/silk/float/x86/inner_product_FLP_avx2.c
@@ -0,0 +1,85 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+ 2023 Amazon
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "SigProc_FLP.h"
+#include <immintrin.h>
+
+
+/* inner product of two silk_float arrays, with result as double */
+double silk_inner_product_FLP_avx2(
+ const silk_float *data1,
+ const silk_float *data2,
+ opus_int dataSize
+)
+{
+ opus_int i;
+ __m256d accum1, accum2;
+ double result;
+
+ /* 4x unrolled loop */
+ result = 0.0;
+ accum1 = accum2 = _mm256_setzero_pd();
+ for( i = 0; i < dataSize - 7; i += 8 ) {
+ __m128 x1f, x2f;
+ __m256d x1d, x2d;
+ x1f = _mm_loadu_ps( &data1[ i ] );
+ x2f = _mm_loadu_ps( &data2[ i ] );
+ x1d = _mm256_cvtps_pd( x1f );
+ x2d = _mm256_cvtps_pd( x2f );
+ accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 );
+ x1f = _mm_loadu_ps( &data1[ i + 4 ] );
+ x2f = _mm_loadu_ps( &data2[ i + 4 ] );
+ x1d = _mm256_cvtps_pd( x1f );
+ x2d = _mm256_cvtps_pd( x2f );
+ accum2 = _mm256_fmadd_pd( x1d, x2d, accum2 );
+ }
+ for( ; i < dataSize - 3; i += 4 ) {
+ __m128 x1f, x2f;
+ __m256d x1d, x2d;
+ x1f = _mm_loadu_ps( &data1[ i ] );
+ x2f = _mm_loadu_ps( &data2[ i ] );
+ x1d = _mm256_cvtps_pd( x1f );
+ x2d = _mm256_cvtps_pd( x2f );
+ accum1 = _mm256_fmadd_pd( x1d, x2d, accum1 );
+ }
+ accum1 = _mm256_add_pd(accum1, accum2);
+ accum1 = _mm256_add_pd(accum1, _mm256_permute2f128_pd(accum1, accum1, 1));
+ accum1 = _mm256_hadd_pd(accum1,accum1);
+ result = _mm256_cvtsd_f64(accum1);
+
+ /* add any remaining products */
+ for( ; i < dataSize; i++ ) {
+ result += data1[ i ] * (double)data2[ i ];
+ }
+
+ return result;
+}
diff --git a/media/libopus/silk/init_decoder.c b/media/libopus/silk/init_decoder.c
index 16c03dcd1c..01bc4b7a12 100644
--- a/media/libopus/silk/init_decoder.c
+++ b/media/libopus/silk/init_decoder.c
@@ -31,15 +31,21 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h"
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
+#include "structs.h"
+
/************************/
-/* Init Decoder State */
+/* Reset Decoder State */
/************************/
-opus_int silk_init_decoder(
+opus_int silk_reset_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
)
{
/* Clear the entire encoder state, except anything copied */
- silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+ silk_memset( &psDec->SILK_DECODER_STATE_RESET_START, 0, sizeof( silk_decoder_state ) - ((char*) &psDec->SILK_DECODER_STATE_RESET_START - (char*)psDec) );
/* Used to deactivate LSF interpolation */
psDec->first_frame_after_reset = 1;
@@ -52,6 +58,27 @@ opus_int silk_init_decoder(
/* Reset PLC state */
silk_PLC_Reset( psDec );
+#ifdef ENABLE_OSCE
+ /* Reset OSCE state and method */
+ osce_reset(&psDec->osce, OSCE_DEFAULT_METHOD);
+#endif
+
+ return 0;
+}
+
+
+/************************/
+/* Init Decoder State */
+/************************/
+opus_int silk_init_decoder(
+ silk_decoder_state *psDec /* I/O Decoder state pointer */
+)
+{
+ /* Clear the entire encoder state, except anything copied */
+ silk_memset( psDec, 0, sizeof( silk_decoder_state ) );
+
+ silk_reset_decoder( psDec );
+
return(0);
}
diff --git a/media/libopus/silk/init_encoder.c b/media/libopus/silk/init_encoder.c
index 65995c33fa..10d41287fe 100644
--- a/media/libopus/silk/init_encoder.c
+++ b/media/libopus/silk/init_encoder.c
@@ -36,6 +36,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "tuning_parameters.h"
#include "cpu_support.h"
+#ifdef ENABLE_DRED
+#include "dred_encoder.h"
+#endif
+
/*********************************/
/* Initialize Silk Encoder state */
/*********************************/
diff --git a/media/libopus/silk/main.h b/media/libopus/silk/main.h
index a5f568758f..cd576d8cc1 100644
--- a/media/libopus/silk/main.h
+++ b/media/libopus/silk/main.h
@@ -252,7 +252,7 @@ void silk_NSQ_c(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -278,7 +278,7 @@ void silk_NSQ_del_dec_c(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -389,6 +389,10 @@ void silk_NLSF_decode(
/****************************************************/
/* Decoder Functions */
/****************************************************/
+opus_int silk_reset_decoder(
+ silk_decoder_state *psDec /* I/O Decoder state pointer */
+);
+
opus_int silk_init_decoder(
silk_decoder_state *psDec /* I/O Decoder state pointer */
);
@@ -410,6 +414,12 @@ opus_int silk_decode_frame(
opus_int32 *pN, /* O Pointer to size of output frame */
opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
opus_int condCoding, /* I The type of conditional coding to use */
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState *lpcnet,
+#endif
+#ifdef ENABLE_OSCE
+ OSCEModel *osce_model,
+#endif
int arch /* I Run-time architecture */
);
diff --git a/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
index cd70713a8f..85bfb637ef 100644
--- a/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
+++ b/media/libopus/silk/mips/NSQ_del_dec_mipsr1.h
@@ -25,8 +25,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-#ifndef __NSQ_DEL_DEC_MIPSR1_H__
-#define __NSQ_DEL_DEC_MIPSR1_H__
+#ifndef NSQ_DEL_DEC_MIPSR1_H__
+#define NSQ_DEL_DEC_MIPSR1_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
@@ -407,4 +407,4 @@ static inline void silk_noise_shape_quantizer_del_dec(
}
}
-#endif /* __NSQ_DEL_DEC_MIPSR1_H__ */
+#endif /* NSQ_DEL_DEC_MIPSR1_H__ */
diff --git a/media/libopus/silk/mips/macros_mipsr1.h b/media/libopus/silk/mips/macros_mipsr1.h
index 12ed981a6e..af408802c3 100644
--- a/media/libopus/silk/mips/macros_mipsr1.h
+++ b/media/libopus/silk/mips/macros_mipsr1.h
@@ -26,8 +26,8 @@ POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
-#ifndef __SILK_MACROS_MIPSR1_H__
-#define __SILK_MACROS_MIPSR1_H__
+#ifndef SILK_MACROS_MIPSR1_H__
+#define SILK_MACROS_MIPSR1_H__
#define mips_clz(x) __builtin_clz(x)
@@ -89,4 +89,4 @@ static inline opus_int32 silk_CLZ32(opus_int32 in32)
return re32;
}
-#endif /* __SILK_MACROS_MIPSR1_H__ */
+#endif /* SILK_MACROS_MIPSR1_H__ */
diff --git a/media/libopus/silk/structs.h b/media/libopus/silk/structs.h
index 3380c757b2..38243be1ea 100644
--- a/media/libopus/silk/structs.h
+++ b/media/libopus/silk/structs.h
@@ -34,6 +34,21 @@ POSSIBILITY OF SUCH DAMAGE.
#include "entenc.h"
#include "entdec.h"
+#ifdef ENABLE_DEEP_PLC
+#include "lpcnet.h"
+#include "lpcnet_private.h"
+#endif
+
+#ifdef ENABLE_DRED
+#include "dred_encoder.h"
+#include "dred_decoder.h"
+#endif
+
+#ifdef ENABLE_OSCE
+#include "osce_config.h"
+#include "osce_structs.h"
+#endif
+
#ifdef __cplusplus
extern "C"
{
@@ -228,6 +243,14 @@ typedef struct {
} silk_encoder_state;
+#ifdef ENABLE_OSCE
+typedef struct {
+ OSCEFeatureState features;
+ OSCEState state;
+ int method;
+} silk_OSCE_struct;
+#endif
+
/* Struct for Packet Loss Concealment */
typedef struct {
opus_int32 pitchL_Q8; /* Pitch lag to use for voiced concealment */
@@ -243,6 +266,7 @@ typedef struct {
opus_int fs_kHz;
opus_int nb_subfr;
opus_int subfr_length;
+ opus_int enable_deep_plc;
} silk_PLC_struct;
/* Struct for CNG */
@@ -259,6 +283,10 @@ typedef struct {
/* Decoder state */
/********************************/
typedef struct {
+#ifdef ENABLE_OSCE
+ silk_OSCE_struct osce;
+#endif
+#define SILK_DECODER_STATE_RESET_START prev_gain_Q16
opus_int32 prev_gain_Q16;
opus_int32 exc_Q14[ MAX_FRAME_LENGTH ];
opus_int32 sLPC_Q14_buf[ MAX_LPC_ORDER ];
diff --git a/media/libopus/silk/x86/NSQ_del_dec_avx2.c b/media/libopus/silk/x86/NSQ_del_dec_avx2.c
new file mode 100644
index 0000000000..43485871a4
--- /dev/null
+++ b/media/libopus/silk/x86/NSQ_del_dec_avx2.c
@@ -0,0 +1,1075 @@
+/***********************************************************************
+Copyright (c) 2021 Google Inc.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef OPUS_CHECK_ASM
+#include <string.h>
+#endif
+
+#include "opus_defines.h"
+#include <immintrin.h>
+
+#include "main.h"
+#include "stack_alloc.h"
+#include "NSQ.h"
+#include "celt/x86/x86cpu.h"
+
+/* Returns TRUE if all assumptions met */
+static OPUS_INLINE int verify_assumptions(const silk_encoder_state *psEncC)
+{
+ /* This optimization is based on these assumptions */
+ /* These assumptions are fundamental and hence assert are */
+ /* used. Should any assert triggers, we have to re-visit */
+ /* all related code to make sure it still functions the */
+ /* same as the C implementation. */
+ silk_assert(MAX_DEL_DEC_STATES <= 4 &&
+ MAX_FRAME_LENGTH % 4 == 0 &&
+ MAX_SUB_FRAME_LENGTH % 4 == 0 &&
+ LTP_MEM_LENGTH_MS % 4 == 0 );
+ silk_assert(psEncC->fs_kHz == 8 ||
+ psEncC->fs_kHz == 12 ||
+ psEncC->fs_kHz == 16 );
+ silk_assert(psEncC->nb_subfr <= MAX_NB_SUBFR &&
+ psEncC->nb_subfr > 0 );
+ silk_assert(psEncC->nStatesDelayedDecision <= MAX_DEL_DEC_STATES &&
+ psEncC->nStatesDelayedDecision > 0 );
+ silk_assert(psEncC->ltp_mem_length == psEncC->fs_kHz * LTP_MEM_LENGTH_MS);
+
+ /* Regressions were observed on certain AMD Zen CPUs when */
+ /* nStatesDelayedDecision is 1 or 2. Ideally we should detect */
+ /* these CPUs and enable this optimization on others; however, */
+ /* there is no good way to do so under current OPUS framework. */
+ return psEncC->nStatesDelayedDecision == 3 ||
+ psEncC->nStatesDelayedDecision == 4;
+}
+
+/* Intrinsics not defined on MSVC */
+#ifdef _MSC_VER
+#include <Intsafe.h>
+#define __m128i_u __m128i
+static inline int __builtin_sadd_overflow(opus_int32 a, opus_int32 b, opus_int32* res)
+{
+ *res = a+b;
+ return (*res ^ a) & (*res ^ b) & 0x80000000;
+}
+static inline int __builtin_ctz(unsigned int x)
+{
+ DWORD res = 0;
+ return _BitScanForward(&res, x) ? res : 32;
+}
+#endif
+
+static OPUS_INLINE __m128i silk_cvtepi64_epi32_high(__m256i num)
+{
+ return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(num, _mm256_set_epi32(0, 0, 0, 0, 7, 5, 3, 1)));
+}
+
+static OPUS_INLINE opus_int16 silk_sat16(opus_int32 num)
+{
+ num = num > silk_int16_MAX ? silk_int16_MAX : num;
+ num = num < silk_int16_MIN ? silk_int16_MIN : num;
+ return num;
+}
+
+static OPUS_INLINE opus_int32 silk_sar_round_32(opus_int32 a, int bits)
+{
+ silk_assert(bits > 0 && bits < 31);
+ a += 1 << (bits-1);
+ return a >> bits;
+}
+
+static OPUS_INLINE opus_int64 silk_sar_round_smulww(opus_int32 a, opus_int32 b, int bits)
+{
+ silk_assert(bits > 0 && bits < 63);
+#ifdef OPUS_CHECK_ASM
+ return silk_RSHIFT_ROUND(silk_SMULWW(a, b), bits);
+#else
+ /* This code is more correct, but it won't overflow like the C code in some rare cases. */
+ silk_assert(bits > 0 && bits < 63);
+ opus_int64 t = ((opus_int64)a) * ((opus_int64)b);
+ bits += 16;
+ t += 1ull << (bits-1);
+ return t >> bits;
+#endif
+}
+
+static OPUS_INLINE opus_int32 silk_add_sat32(opus_int32 a, opus_int32 b)
+{
+ opus_int32 sum;
+ if (__builtin_sadd_overflow(a, b, &sum))
+ {
+ return a >= 0 ? silk_int32_MAX : silk_int32_MIN;
+ }
+ return sum;
+}
+
+static OPUS_INLINE __m128i silk_mm_srai_round_epi32(__m128i a, int bits)
+{
+ silk_assert(bits > 0 && bits < 31);
+ return _mm_srai_epi32(_mm_add_epi32(a, _mm_set1_epi32(1 << (bits - 1))), bits);
+}
+
+/* add/subtract with output saturated */
+static OPUS_INLINE __m128i silk_mm_add_sat_epi32(__m128i a, __m128i b)
+{
+ __m128i r = _mm_add_epi32(a, b);
+ __m128i OF = _mm_and_si128(_mm_xor_si128(a, r), _mm_xor_si128(b, r)); /* OF = (sum ^ a) & (sum ^ b) */
+ __m128i SAT = _mm_add_epi32(_mm_srli_epi32(a, 31), _mm_set1_epi32(0x7FFFFFFF)); /* SAT = (a >> 31) + 0x7FFFFFFF */
+ return _mm_blendv_epi8(r, SAT, _mm_srai_epi32(OF, 31));
+}
+static OPUS_INLINE __m128i silk_mm_sub_sat_epi32(__m128i a, __m128i b)
+{
+ __m128i r = _mm_sub_epi32(a, b);
+ __m128i OF = _mm_andnot_si128(_mm_xor_si128(b, r), _mm_xor_si128(a, r)); /* OF = (sum ^ a) & (sum ^ ~b) = (sum ^ a) & ~(sum ^ b) */
+ __m128i SAT = _mm_add_epi32(_mm_srli_epi32(a, 31), _mm_set1_epi32(0x7FFFFFFF)); /* SAT = (a >> 31) + 0x7FFFFFFF */
+ return _mm_blendv_epi8(r, SAT, _mm_srai_epi32(OF, 31));
+}
+static OPUS_INLINE __m256i silk_mm256_sub_sat_epi32(__m256i a, __m256i b)
+{
+ __m256i r = _mm256_sub_epi32(a, b);
+ __m256i OF = _mm256_andnot_si256(_mm256_xor_si256(b, r), _mm256_xor_si256(a, r)); /* OF = (sum ^ a) & (sum ^ ~b) = (sum ^ a) & ~(sum ^ b) */
+ __m256i SAT = _mm256_add_epi32(_mm256_srli_epi32(a, 31), _mm256_set1_epi32(0x7FFFFFFF)); /* SAT = (a >> 31) + 0x7FFFFFFF */
+ return _mm256_blendv_epi8(r, SAT, _mm256_srai_epi32(OF, 31));
+}
+
+static OPUS_INLINE __m128i silk_mm_limit_epi32(__m128i num, opus_int32 limit1, opus_int32 limit2)
+{
+ opus_int32 lo = limit1 < limit2 ? limit1 : limit2;
+ opus_int32 hi = limit1 > limit2 ? limit1 : limit2;
+
+ num = _mm_min_epi32(num, _mm_set1_epi32(hi));
+ num = _mm_max_epi32(num, _mm_set1_epi32(lo));
+ return num;
+}
+
+/* cond < 0 ? -num : num */
+static OPUS_INLINE __m128i silk_mm_sign_epi32(__m128i num, __m128i cond)
+{
+ return _mm_sign_epi32(num, _mm_or_si128(cond, _mm_set1_epi32(1)));
+}
+static OPUS_INLINE __m256i silk_mm256_sign_epi32(__m256i num, __m256i cond)
+{
+ return _mm256_sign_epi32(num, _mm256_or_si256(cond, _mm256_set1_epi32(1)));
+}
+
+/* (a32 * b32) >> 16 */
+static OPUS_INLINE __m128i silk_mm_smulww_epi32(__m128i a, opus_int32 b)
+{
+ return silk_cvtepi64_epi32_high(_mm256_slli_epi64(_mm256_mul_epi32(_mm256_cvtepi32_epi64(a), _mm256_set1_epi32(b)), 16));
+}
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+static OPUS_INLINE __m128i silk_mm_smulwb_epi32(__m128i a, opus_int32 b)
+{
+ return silk_cvtepi64_epi32_high(_mm256_mul_epi32(_mm256_cvtepi32_epi64(a), _mm256_set1_epi32(silk_LSHIFT(b, 16))));
+}
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+static OPUS_INLINE __m256i silk_mm256_smulbb_epi32(__m256i a, __m256i b)
+{
+ const char FF = (char)0xFF;
+ __m256i msk = _mm256_set_epi8(
+ FF, FF, FF, FF, FF, FF, FF, FF, 13, 12, 9, 8, 5, 4, 1, 0,
+ FF, FF, FF, FF, FF, FF, FF, FF, 13, 12, 9, 8, 5, 4, 1, 0);
+ __m256i lo = _mm256_mullo_epi16(a, b);
+ __m256i hi = _mm256_mulhi_epi16(a, b);
+ lo = _mm256_shuffle_epi8(lo, msk);
+ hi = _mm256_shuffle_epi8(hi, msk);
+ return _mm256_unpacklo_epi16(lo, hi);
+}
+
+static OPUS_INLINE __m256i silk_mm256_reverse_epi32(__m256i v)
+{
+ v = _mm256_shuffle_epi32(v, 0x1B);
+ v = _mm256_permute4x64_epi64(v, 0x4E);
+ return v;
+}
+
+static OPUS_INLINE opus_int32 silk_mm256_hsum_epi32(__m256i v)
+{
+ __m128i sum = _mm_add_epi32(_mm256_extracti128_si256(v, 1), _mm256_extracti128_si256(v, 0));
+ sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E));
+ sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1));
+ return _mm_cvtsi128_si32(sum);
+}
+
+static OPUS_INLINE __m128i silk_mm_hmin_epi32(__m128i num)
+{
+ num = _mm_min_epi32(num, _mm_shuffle_epi32(num, 0x4E)); /* 0123 -> 2301 */
+ num = _mm_min_epi32(num, _mm_shuffle_epi32(num, 0xB1)); /* 0123 -> 1032 */
+ return num;
+}
+
+static OPUS_INLINE __m128i silk_mm_hmax_epi32(__m128i num)
+{
+ num = _mm_max_epi32(num, _mm_shuffle_epi32(num, 0x4E)); /* 0123 -> 2310 */
+ num = _mm_max_epi32(num, _mm_shuffle_epi32(num, 0xB1)); /* 0123 -> 1032 */
+ return num;
+}
+
+static OPUS_INLINE __m128i silk_mm_mask_hmin_epi32(__m128i num, __m128i mask)
+{
+ num = _mm_blendv_epi8(num, _mm_set1_epi32(silk_int32_MAX), mask);
+ return silk_mm_hmin_epi32(num);
+}
+
+static OPUS_INLINE __m128i silk_mm_mask_hmax_epi32(__m128i num, __m128i mask)
+{
+ num = _mm_blendv_epi8(num, _mm_set1_epi32(silk_int32_MIN), mask);
+ return silk_mm_hmax_epi32(num);
+}
+
+static OPUS_INLINE __m128i silk_mm256_rand_epi32(__m128i seed)
+{
+ seed = _mm_mullo_epi32(seed, _mm_set1_epi32(RAND_MULTIPLIER));
+ seed = _mm_add_epi32(seed, _mm_set1_epi32(RAND_INCREMENT));
+ return seed;
+}
+
+static OPUS_INLINE opus_int32 silk_index_of_first_equal_epi32(__m128i a, __m128i b)
+{
+ unsigned int mask = _mm_movemask_epi8(_mm_cmpeq_epi32(a, b)) & 0x1111;
+ silk_assert(mask != 0);
+ return __builtin_ctz(mask) >> 2;
+}
+
+static __m128i silk_index_to_selector(opus_int32 index)
+{
+ silk_assert(index < 4);
+ index <<= 2;
+ return _mm_set_epi8(
+ index + 3, index + 2, index + 1, index + 0,
+ index + 3, index + 2, index + 1, index + 0,
+ index + 3, index + 2, index + 1, index + 0,
+ index + 3, index + 2, index + 1, index + 0);
+}
+
+static opus_int32 silk_select_winner(__m128i num, __m128i selector)
+{
+ return _mm_cvtsi128_si32(_mm_shuffle_epi8(num, selector));
+}
+
+typedef struct
+{
+ __m128i RandState;
+ __m128i Q_Q10;
+ __m128i Xq_Q14;
+ __m128i Pred_Q15;
+ __m128i Shape_Q14;
+} NSQ_del_dec_sample_struct;
+
+typedef struct
+{
+ __m128i sLPC_Q14[MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH];
+ __m128i LF_AR_Q14;
+ __m128i Seed;
+ __m128i SeedInit;
+ __m128i RD_Q10;
+ __m128i Diff_Q14;
+ __m128i sAR2_Q14[MAX_SHAPE_LPC_ORDER];
+ NSQ_del_dec_sample_struct Samples[DECISION_DELAY];
+} NSQ_del_dec_struct;
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states_avx2(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct *psDelDec, /* I/O Delayed decision states */
+ const opus_int16 x16[], /* I Input */
+ opus_int32 x_sc_Q10[MAX_SUB_FRAME_LENGTH], /* O Input scaled with 1/Gain in Q10 */
+ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
+ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
+ opus_int subfr, /* I Subframe number */
+ const opus_int LTP_scale_Q14, /* I LTP state scaling */
+ const opus_int32 Gains_Q16[MAX_NB_SUBFR], /* I */
+ const opus_int pitchL[MAX_NB_SUBFR], /* I Pitch lag */
+ const opus_int signal_type, /* I Signal type */
+ const opus_int decisionDelay /* I Decision delay */
+);
+
+/*******************************************/
+/* LPC analysis filter */
+/* NB! State is kept internally and the */
+/* filter always starts with zero state */
+/* first d output samples are set to zero */
+/*******************************************/
+static OPUS_INLINE void silk_LPC_analysis_filter_avx2(
+ opus_int16 *out, /* O Output signal */
+ const opus_int16 *in, /* I Input signal */
+ const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
+ const opus_int32 len, /* I Signal length */
+ const opus_int32 order /* I Filter order */
+);
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_avx2(
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct psDelDec[], /* I/O Delayed decision states */
+ opus_int signalType, /* I Signal type */
+ const opus_int32 x_Q10[], /* I */
+ opus_int8 pulses[], /* O */
+ opus_int16 xq[], /* O */
+ opus_int32 sLTP_Q15[], /* I/O LTP filter state */
+ opus_int32 delayedGain_Q10[DECISION_DELAY], /* I/O Gain delay buffer */
+ const opus_int16 a_Q12[], /* I Short term prediction coefs */
+ const opus_int16 b_Q14[], /* I Long term prediction coefs */
+ const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */
+ opus_int lag, /* I Pitch lag */
+ opus_int32 HarmShapeFIRPacked_Q14, /* I */
+ opus_int Tilt_Q14, /* I Spectral tilt */
+ opus_int32 LF_shp_Q14, /* I */
+ opus_int32 Gain_Q16, /* I */
+ opus_int Lambda_Q10, /* I */
+ opus_int offset_Q10, /* I */
+ opus_int length, /* I Input length */
+ opus_int subfr, /* I Subframe number */
+ opus_int shapingLPCOrder, /* I Shaping LPC filter order */
+ opus_int predictLPCOrder, /* I Prediction filter order */
+ opus_int warping_Q16, /* I */
+ __m128i MaskDelDec, /* I Mask of states in decision tree */
+ opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
+ opus_int decisionDelay /* I */
+);
+
+void silk_NSQ_del_dec_avx2(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ SideInfoIndices *psIndices, /* I/O Quantization Indices */
+ const opus_int16 x16[], /* I Input */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
+ const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR], /* I Long term prediction coefs */
+ const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER], /* I Noise shaping coefs */
+ const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR], /* I Long term shaping coefs */
+ const opus_int Tilt_Q14[MAX_NB_SUBFR], /* I Spectral tilt */
+ const opus_int32 LF_shp_Q14[MAX_NB_SUBFR], /* I Low frequency shaping coefs */
+ const opus_int32 Gains_Q16[MAX_NB_SUBFR], /* I Quantization step sizes */
+ const opus_int32 pitchL[MAX_NB_SUBFR], /* I Pitch lags */
+ const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
+ const opus_int LTP_scale_Q14 /* I LTP state scaling */
+)
+{
+#ifdef OPUS_CHECK_ASM
+ silk_nsq_state NSQ_c;
+ SideInfoIndices psIndices_c;
+ opus_int8 pulses_c[MAX_FRAME_LENGTH];
+ const opus_int8 *const pulses_a = pulses;
+
+ silk_memcpy(&NSQ_c, NSQ, sizeof(NSQ_c));
+ silk_memcpy(&psIndices_c, psIndices, sizeof(psIndices_c));
+ silk_memcpy(pulses_c, pulses, sizeof(pulses_c));
+ silk_NSQ_del_dec_c(psEncC, &NSQ_c, &psIndices_c, x16, pulses_c, PredCoef_Q12, LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16,
+ pitchL, Lambda_Q10, LTP_scale_Q14);
+#endif
+
+ if (!verify_assumptions(psEncC))
+ {
+ silk_NSQ_del_dec_c(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14);
+ return;
+ }
+
+ opus_int i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
+ opus_int last_smple_idx, smpl_buf_idx, decisionDelay;
+ const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
+ opus_int16 *pxq;
+ VARDECL(opus_int32, sLTP_Q15);
+ VARDECL(opus_int16, sLTP);
+ opus_int32 HarmShapeFIRPacked_Q14;
+ opus_int offset_Q10;
+ opus_int32 Gain_Q10;
+ opus_int32 x_sc_Q10[MAX_SUB_FRAME_LENGTH];
+ opus_int32 delayedGain_Q10[DECISION_DELAY];
+ NSQ_del_dec_struct psDelDec = {0};
+ NSQ_del_dec_sample_struct *psSample;
+ __m128i RDmin_Q10, MaskDelDec, Winner_selector;
+ SAVE_STACK;
+
+ MaskDelDec = _mm_cvtepi8_epi32(_mm_cvtsi32_si128(0xFFFFFF00ul << ((psEncC->nStatesDelayedDecision - 1) << 3)));
+
+ /* Set unvoiced lag to the previous one, overwrite later for voiced */
+ lag = NSQ->lagPrev;
+
+ silk_assert(NSQ->prev_gain_Q16 != 0);
+ psDelDec.Seed = _mm_and_si128(
+ _mm_add_epi32(_mm_set_epi32(3, 2, 1, 0), _mm_set1_epi32(psIndices->Seed)),
+ _mm_set1_epi32(3));
+ psDelDec.SeedInit = psDelDec.Seed;
+ psDelDec.RD_Q10 = _mm_setzero_si128();
+ psDelDec.LF_AR_Q14 = _mm_set1_epi32(NSQ->sLF_AR_shp_Q14);
+ psDelDec.Diff_Q14 = _mm_set1_epi32(NSQ->sDiff_shp_Q14);
+ psDelDec.Samples[0].Shape_Q14 = _mm_set1_epi32(NSQ->sLTP_shp_Q14[psEncC->ltp_mem_length - 1]);
+ for (i = 0; i < NSQ_LPC_BUF_LENGTH; i++)
+ {
+ psDelDec.sLPC_Q14[i] = _mm_set1_epi32(NSQ->sLPC_Q14[i]);
+ }
+ for (i = 0; i < MAX_SHAPE_LPC_ORDER; i++)
+ {
+ psDelDec.sAR2_Q14[i] = _mm_set1_epi32(NSQ->sAR2_Q14[i]);
+ }
+
+ offset_Q10 = silk_Quantization_Offsets_Q10[psIndices->signalType >> 1][psIndices->quantOffsetType];
+ smpl_buf_idx = 0; /* index of oldest samples */
+
+ decisionDelay = silk_min_int(DECISION_DELAY, psEncC->subfr_length);
+
+ /* For voiced frames limit the decision delay to lower than the pitch lag */
+ if (psIndices->signalType == TYPE_VOICED)
+ {
+ for (k = 0; k < psEncC->nb_subfr; k++)
+ {
+ decisionDelay = silk_min_int(decisionDelay, pitchL[k] - LTP_ORDER / 2 - 1);
+ }
+ }
+ else
+ {
+ if (lag > 0)
+ {
+ decisionDelay = silk_min_int(decisionDelay, lag - LTP_ORDER / 2 - 1);
+ }
+ }
+
+ if (psIndices->NLSFInterpCoef_Q2 == 4)
+ {
+ LSF_interpolation_flag = 0;
+ }
+ else
+ {
+ LSF_interpolation_flag = 1;
+ }
+
+ ALLOC(sLTP_Q15, psEncC->ltp_mem_length + psEncC->frame_length, opus_int32);
+ ALLOC(sLTP, psEncC->ltp_mem_length + psEncC->frame_length, opus_int16);
+ /* Set up pointers to start of sub frame */
+ pxq = &NSQ->xq[psEncC->ltp_mem_length];
+ NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
+ NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+ subfr = 0;
+ for (k = 0; k < psEncC->nb_subfr; k++)
+ {
+ A_Q12 = &PredCoef_Q12[((k >> 1) | (1 ^ LSF_interpolation_flag)) * MAX_LPC_ORDER];
+ B_Q14 = &LTPCoef_Q14[k * LTP_ORDER];
+ AR_shp_Q13 = &AR_Q13[k * MAX_SHAPE_LPC_ORDER];
+
+ /* Noise shape parameters */
+ silk_assert(HarmShapeGain_Q14[k] >= 0);
+ HarmShapeFIRPacked_Q14 = silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
+ HarmShapeFIRPacked_Q14 |= silk_LSHIFT( (opus_int32)silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
+
+ NSQ->rewhite_flag = 0;
+ if (psIndices->signalType == TYPE_VOICED)
+ {
+ /* Voiced */
+ lag = pitchL[k];
+
+ /* Re-whitening */
+ if ((k & (3 ^ (LSF_interpolation_flag << 1))) == 0)
+ {
+ if (k == 2)
+ {
+ /* RESET DELAYED DECISIONS */
+ /* Find winner */
+ RDmin_Q10 = silk_mm_mask_hmin_epi32(psDelDec.RD_Q10, MaskDelDec);
+ Winner_ind = silk_index_of_first_equal_epi32(RDmin_Q10, psDelDec.RD_Q10);
+ Winner_selector = silk_index_to_selector(Winner_ind);
+ psDelDec.RD_Q10 = _mm_add_epi32(
+ psDelDec.RD_Q10,
+ _mm_blendv_epi8(
+ _mm_set1_epi32(silk_int32_MAX >> 4),
+ _mm_setzero_si128(),
+ _mm_cvtepi8_epi32(_mm_cvtsi32_si128(0xFFU << (unsigned)(Winner_ind << 3)))));
+
+ /* Copy final part of signals from winner state to output and long-term filter states */
+ last_smple_idx = smpl_buf_idx + decisionDelay;
+ for (i = 0; i < decisionDelay; i++)
+ {
+ last_smple_idx = (last_smple_idx + DECISION_DELAY - 1) % DECISION_DELAY;
+ psSample = &psDelDec.Samples[last_smple_idx];
+ pulses[i - decisionDelay] =
+ (opus_int8)silk_sar_round_32(silk_select_winner(psSample->Q_Q10, Winner_selector), 10);
+ pxq[i - decisionDelay] =
+ silk_sat16((opus_int32)silk_sar_round_smulww(silk_select_winner(psSample->Xq_Q14, Winner_selector), Gains_Q16[1], 14));
+ NSQ->sLTP_shp_Q14[NSQ->sLTP_shp_buf_idx - decisionDelay + i] =
+ silk_select_winner(psSample->Shape_Q14, Winner_selector);
+ }
+
+ subfr = 0;
+ }
+
+ /* Rewhiten with new A coefs */
+ start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
+ silk_assert(start_idx > 0);
+
+ silk_LPC_analysis_filter_avx2(&sLTP[start_idx], &NSQ->xq[start_idx + k * psEncC->subfr_length],
+ A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder);
+
+ NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
+ NSQ->rewhite_flag = 1;
+ }
+ }
+
+ silk_nsq_del_dec_scale_states_avx2(psEncC, NSQ, &psDelDec, x16, x_sc_Q10, sLTP, sLTP_Q15, k,
+ LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay);
+
+ silk_noise_shape_quantizer_del_dec_avx2(NSQ, &psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
+ delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[k], LF_shp_Q14[k],
+ Gains_Q16[k], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
+ psEncC->predictLPCOrder, psEncC->warping_Q16, MaskDelDec, &smpl_buf_idx, decisionDelay);
+
+ x16 += psEncC->subfr_length;
+ pulses += psEncC->subfr_length;
+ pxq += psEncC->subfr_length;
+ }
+
+ /* Find winner */
+ RDmin_Q10 = silk_mm_mask_hmin_epi32(psDelDec.RD_Q10, MaskDelDec);
+ Winner_selector = silk_index_to_selector(silk_index_of_first_equal_epi32(RDmin_Q10, psDelDec.RD_Q10));
+
+ /* Copy final part of signals from winner state to output and long-term filter states */
+ psIndices->Seed = silk_select_winner(psDelDec.SeedInit, Winner_selector);
+ last_smple_idx = smpl_buf_idx + decisionDelay;
+ Gain_Q10 = Gains_Q16[psEncC->nb_subfr - 1] >> 6;
+ for (i = 0; i < decisionDelay; i++)
+ {
+ last_smple_idx = (last_smple_idx + DECISION_DELAY - 1) % DECISION_DELAY;
+ psSample = &psDelDec.Samples[last_smple_idx];
+
+ pulses[i - decisionDelay] =
+ (opus_int8)silk_sar_round_32(silk_select_winner(psSample->Q_Q10, Winner_selector), 10);
+ pxq[i - decisionDelay] =
+ silk_sat16((opus_int32)silk_sar_round_smulww(silk_select_winner(psSample->Xq_Q14, Winner_selector), Gain_Q10, 8));
+ NSQ->sLTP_shp_Q14[NSQ->sLTP_shp_buf_idx - decisionDelay + i] =
+ silk_select_winner(psSample->Shape_Q14, Winner_selector);
+ }
+ for (i = 0; i < NSQ_LPC_BUF_LENGTH; i++)
+ {
+ NSQ->sLPC_Q14[i] = silk_select_winner(psDelDec.sLPC_Q14[i], Winner_selector);
+ }
+ for (i = 0; i < MAX_SHAPE_LPC_ORDER; i++)
+ {
+ NSQ->sAR2_Q14[i] = silk_select_winner(psDelDec.sAR2_Q14[i], Winner_selector);
+ }
+
+ /* Update states */
+ NSQ->sLF_AR_shp_Q14 = silk_select_winner(psDelDec.LF_AR_Q14, Winner_selector);
+ NSQ->sDiff_shp_Q14 = silk_select_winner(psDelDec.Diff_Q14, Winner_selector);
+ NSQ->lagPrev = pitchL[psEncC->nb_subfr - 1];
+
+ /* Save quantized speech signal */
+ silk_memmove(NSQ->xq, &NSQ->xq[psEncC->frame_length], psEncC->ltp_mem_length * sizeof(opus_int16));
+ silk_memmove(NSQ->sLTP_shp_Q14, &NSQ->sLTP_shp_Q14[psEncC->frame_length], psEncC->ltp_mem_length * sizeof(opus_int32));
+
+#ifdef OPUS_CHECK_ASM
+ silk_assert(!memcmp(&NSQ_c, NSQ, sizeof(NSQ_c)));
+ silk_assert(!memcmp(&psIndices_c, psIndices, sizeof(psIndices_c)));
+ silk_assert(!memcmp(pulses_c, pulses_a, sizeof(pulses_c)));
+#endif
+
+ RESTORE_STACK;
+}
+
+static OPUS_INLINE __m128i silk_noise_shape_quantizer_short_prediction_x4(const __m128i *buf32, const opus_int16 *coef16, opus_int order)
+{
+ __m256i out;
+ silk_assert(order == 10 || order == 16);
+
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ out = _mm256_set1_epi32(order >> 1);
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-0]), _mm256_set1_epi32(silk_LSHIFT(coef16[0], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-1]), _mm256_set1_epi32(silk_LSHIFT(coef16[1], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-2]), _mm256_set1_epi32(silk_LSHIFT(coef16[2], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-3]), _mm256_set1_epi32(silk_LSHIFT(coef16[3], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-4]), _mm256_set1_epi32(silk_LSHIFT(coef16[4], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-5]), _mm256_set1_epi32(silk_LSHIFT(coef16[5], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-6]), _mm256_set1_epi32(silk_LSHIFT(coef16[6], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-7]), _mm256_set1_epi32(silk_LSHIFT(coef16[7], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-8]), _mm256_set1_epi32(silk_LSHIFT(coef16[8], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-9]), _mm256_set1_epi32(silk_LSHIFT(coef16[9], 16)))); /* High DWORD */
+
+ if (order == 16)
+ {
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-10]), _mm256_set1_epi32(silk_LSHIFT(coef16[10], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-11]), _mm256_set1_epi32(silk_LSHIFT(coef16[11], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-12]), _mm256_set1_epi32(silk_LSHIFT(coef16[12], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-13]), _mm256_set1_epi32(silk_LSHIFT(coef16[13], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-14]), _mm256_set1_epi32(silk_LSHIFT(coef16[14], 16)))); /* High DWORD */
+ out = _mm256_add_epi32(out, _mm256_mul_epi32(_mm256_cvtepi32_epi64(buf32[-15]), _mm256_set1_epi32(silk_LSHIFT(coef16[15], 16)))); /* High DWORD */
+ }
+ return silk_cvtepi64_epi32_high(out);
+}
+
+/******************************************/
+/* Noise shape quantizer for one subframe */
+/******************************************/
+static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_avx2(
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct *psDelDec, /* I/O Delayed decision states */
+ opus_int signalType, /* I Signal type */
+ const opus_int32 x_Q10[], /* I */
+ opus_int8 pulses[], /* O */
+ opus_int16 xq[], /* O */
+ opus_int32 sLTP_Q15[], /* I/O LTP filter state */
+ opus_int32 delayedGain_Q10[DECISION_DELAY], /* I/O Gain delay buffer */
+ const opus_int16 a_Q12[], /* I Short term prediction coefs */
+ const opus_int16 b_Q14[], /* I Long term prediction coefs */
+ const opus_int16 AR_shp_Q13[], /* I Noise shaping coefs */
+ opus_int lag, /* I Pitch lag */
+ opus_int32 HarmShapeFIRPacked_Q14, /* I */
+ opus_int Tilt_Q14, /* I Spectral tilt */
+ opus_int32 LF_shp_Q14, /* I */
+ opus_int32 Gain_Q16, /* I */
+ opus_int Lambda_Q10, /* I */
+ opus_int offset_Q10, /* I */
+ opus_int length, /* I Input length */
+ opus_int subfr, /* I Subframe number */
+ opus_int shapingLPCOrder, /* I Shaping LPC filter order */
+ opus_int predictLPCOrder, /* I Prediction filter order */
+ opus_int warping_Q16, /* I */
+ __m128i MaskDelDec, /* I Mask of states in decision tree */
+ opus_int *smpl_buf_idx, /* I/O Index to newest samples in buffers */
+ opus_int decisionDelay /* I */
+)
+{
+ int i;
+ opus_int32 *shp_lag_ptr = &NSQ->sLTP_shp_Q14[NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2];
+ opus_int32 *pred_lag_ptr = &sLTP_Q15[NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2];
+ opus_int32 Gain_Q10 = Gain_Q16 >> 6;
+
+ for (i = 0; i < length; i++)
+ {
+ /* Perform common calculations used in all states */
+ /* NSQ_sample_struct */
+ /* Low 128 bits => 1st set */
+ /* High 128 bits => 2nd set */
+ int j;
+ __m256i SS_Q_Q10;
+ __m256i SS_RD_Q10;
+ __m256i SS_xq_Q14;
+ __m256i SS_LF_AR_Q14;
+ __m256i SS_Diff_Q14;
+ __m256i SS_sLTP_shp_Q14;
+ __m256i SS_LPC_exc_Q14;
+ __m256i exc_Q14;
+ __m256i q_Q10, rr_Q10, rd_Q10;
+ __m256i mask;
+ __m128i LPC_pred_Q14, n_AR_Q14;
+ __m128i RDmin_Q10, RDmax_Q10;
+ __m128i n_LF_Q14;
+ __m128i r_Q10, q1_Q0, q1_Q10, q2_Q10;
+ __m128i Winner_rand_state, Winner_selector;
+ __m128i tmp0, tmp1;
+ NSQ_del_dec_sample_struct *psLastSample, *psSample;
+ opus_int32 RDmin_ind, RDmax_ind, last_smple_idx;
+ opus_int32 LTP_pred_Q14, n_LTP_Q14;
+
+ /* Long-term prediction */
+ if (signalType == TYPE_VOICED)
+ {
+ /* Unrolled loop */
+ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
+ LTP_pred_Q14 = 2;
+ LTP_pred_Q14 += silk_SMULWB(pred_lag_ptr[-0], b_Q14[0]);
+ LTP_pred_Q14 += silk_SMULWB(pred_lag_ptr[-1], b_Q14[1]);
+ LTP_pred_Q14 += silk_SMULWB(pred_lag_ptr[-2], b_Q14[2]);
+ LTP_pred_Q14 += silk_SMULWB(pred_lag_ptr[-3], b_Q14[3]);
+ LTP_pred_Q14 += silk_SMULWB(pred_lag_ptr[-4], b_Q14[4]);
+ LTP_pred_Q14 = silk_LSHIFT(LTP_pred_Q14, 1); /* Q13 -> Q14 */
+ pred_lag_ptr++;
+ }
+ else
+ {
+ LTP_pred_Q14 = 0;
+ }
+
+ /* Long-term shaping */
+ if (lag > 0)
+ {
+ /* Symmetric, packed FIR coefficients */
+ n_LTP_Q14 = silk_add_sat32(shp_lag_ptr[0], shp_lag_ptr[-2]);
+ n_LTP_Q14 = silk_SMULWB(n_LTP_Q14, HarmShapeFIRPacked_Q14);
+ n_LTP_Q14 = n_LTP_Q14 + silk_SMULWT(shp_lag_ptr[-1], HarmShapeFIRPacked_Q14);
+ n_LTP_Q14 = LTP_pred_Q14 - (silk_LSHIFT(n_LTP_Q14, 2)); /* Q12 -> Q14 */
+ shp_lag_ptr++;
+ }
+ else
+ {
+ n_LTP_Q14 = 0;
+ }
+
+ /* BEGIN Updating Delayed Decision States */
+
+ /* Generate dither */
+ psDelDec->Seed = silk_mm256_rand_epi32(psDelDec->Seed);
+
+ /* Short-term prediction */
+ LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction_x4(&psDelDec->sLPC_Q14[NSQ_LPC_BUF_LENGTH - 1 + i], a_Q12, predictLPCOrder);
+ LPC_pred_Q14 = _mm_slli_epi32(LPC_pred_Q14, 4); /* Q10 -> Q14 */
+
+ /* Noise shape feedback */
+ silk_assert(shapingLPCOrder > 0);
+ silk_assert((shapingLPCOrder & 1) == 0); /* check that order is even */
+ /* Output of lowpass section */
+ tmp0 = _mm_add_epi32(psDelDec->Diff_Q14, silk_mm_smulwb_epi32(psDelDec->sAR2_Q14[0], warping_Q16));
+ n_AR_Q14 = _mm_set1_epi32(shapingLPCOrder >> 1);
+ for (j = 0; j < shapingLPCOrder - 1; j++)
+ {
+ /* Output of allpass section */
+ tmp1 = psDelDec->sAR2_Q14[j];
+ psDelDec->sAR2_Q14[j] = tmp0;
+ n_AR_Q14 = _mm_add_epi32(n_AR_Q14, silk_mm_smulwb_epi32(tmp0, AR_shp_Q13[j]));
+ tmp0 = _mm_add_epi32(tmp1, silk_mm_smulwb_epi32(_mm_sub_epi32(psDelDec->sAR2_Q14[j + 1], tmp0), warping_Q16));
+ }
+ psDelDec->sAR2_Q14[shapingLPCOrder - 1] = tmp0;
+ n_AR_Q14 = _mm_add_epi32(n_AR_Q14, silk_mm_smulwb_epi32(tmp0, AR_shp_Q13[shapingLPCOrder - 1]));
+
+ n_AR_Q14 = _mm_slli_epi32(n_AR_Q14, 1); /* Q11 -> Q12 */
+ n_AR_Q14 = _mm_add_epi32(n_AR_Q14, silk_mm_smulwb_epi32(psDelDec->LF_AR_Q14, Tilt_Q14)); /* Q12 */
+ n_AR_Q14 = _mm_slli_epi32(n_AR_Q14, 2); /* Q12 -> Q14 */
+
+ tmp0 = silk_mm_smulwb_epi32(psDelDec->Samples[*smpl_buf_idx].Shape_Q14, LF_shp_Q14); /* Q12 */
+ tmp1 = silk_mm_smulwb_epi32(psDelDec->LF_AR_Q14, LF_shp_Q14 >> 16); /* Q12 */
+ n_LF_Q14 = _mm_add_epi32(tmp0, tmp1); /* Q12 */
+ n_LF_Q14 = _mm_slli_epi32(n_LF_Q14, 2); /* Q12 -> Q14 */
+
+ /* Input minus prediction plus noise feedback */
+ /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP */
+ tmp0 = silk_mm_add_sat_epi32(n_AR_Q14, n_LF_Q14); /* Q14 */
+ tmp1 = _mm_add_epi32(_mm_set1_epi32(n_LTP_Q14), LPC_pred_Q14); /* Q13 */
+ tmp0 = silk_mm_sub_sat_epi32(tmp1, tmp0); /* Q13 */
+ tmp0 = silk_mm_srai_round_epi32(tmp0, 4); /* Q10 */
+
+ r_Q10 = _mm_sub_epi32(_mm_set1_epi32(x_Q10[i]), tmp0); /* residual error Q10 */
+
+ /* Flip sign depending on dither */
+ r_Q10 = silk_mm_sign_epi32(r_Q10, psDelDec->Seed);
+ r_Q10 = silk_mm_limit_epi32(r_Q10, -(31 << 10), 30 << 10);
+
+ /* Find two quantization level candidates and measure their rate-distortion */
+ q1_Q10 = _mm_sub_epi32(r_Q10, _mm_set1_epi32(offset_Q10));
+ q1_Q0 = _mm_srai_epi32(q1_Q10, 10);
+ if (Lambda_Q10 > 2048)
+ {
+ /* For aggressive RDO, the bias becomes more than one pulse. */
+ tmp0 = _mm_sub_epi32(_mm_abs_epi32(q1_Q10), _mm_set1_epi32(Lambda_Q10 / 2 - 512)); /* rdo_offset */
+ q1_Q0 = _mm_srai_epi32(q1_Q10, 31);
+ tmp1 = _mm_cmpgt_epi32(tmp0, _mm_setzero_si128());
+ tmp0 = _mm_srai_epi32(silk_mm_sign_epi32(tmp0, q1_Q10), 10);
+ q1_Q0 = _mm_blendv_epi8(q1_Q0, tmp0, tmp1);
+ }
+
+ tmp0 = _mm_sign_epi32(_mm_set1_epi32(QUANT_LEVEL_ADJUST_Q10), q1_Q0);
+ q1_Q10 = _mm_sub_epi32(_mm_slli_epi32(q1_Q0, 10), tmp0);
+ q1_Q10 = _mm_add_epi32(q1_Q10, _mm_set1_epi32(offset_Q10));
+
+ /* check if q1_Q0 is 0 or -1 */
+ tmp0 = _mm_add_epi32(_mm_srli_epi32(q1_Q0, 31), q1_Q0);
+ tmp1 = _mm_cmpeq_epi32(tmp0, _mm_setzero_si128());
+ tmp0 = _mm_blendv_epi8(_mm_set1_epi32(1024), _mm_set1_epi32(1024 - QUANT_LEVEL_ADJUST_Q10), tmp1);
+ q2_Q10 = _mm_add_epi32(q1_Q10, tmp0);
+ q_Q10 = _mm256_set_m128i(q2_Q10, q1_Q10);
+
+ rr_Q10 = _mm256_sub_epi32(_mm256_broadcastsi128_si256(r_Q10), q_Q10);
+ rd_Q10 = _mm256_abs_epi32(q_Q10);
+ rr_Q10 = silk_mm256_smulbb_epi32(rr_Q10, rr_Q10);
+ rd_Q10 = silk_mm256_smulbb_epi32(rd_Q10, _mm256_set1_epi32(Lambda_Q10));
+ rd_Q10 = _mm256_add_epi32(rd_Q10, rr_Q10);
+ rd_Q10 = _mm256_srai_epi32(rd_Q10, 10);
+
+ mask = _mm256_broadcastsi128_si256(_mm_cmplt_epi32(_mm256_extracti128_si256(rd_Q10, 0), _mm256_extracti128_si256(rd_Q10, 1)));
+ SS_RD_Q10 = _mm256_add_epi32(
+ _mm256_broadcastsi128_si256(psDelDec->RD_Q10),
+ _mm256_blendv_epi8(
+ _mm256_permute2x128_si256(rd_Q10, rd_Q10, 0x1),
+ rd_Q10,
+ mask));
+ SS_Q_Q10 = _mm256_blendv_epi8(
+ _mm256_permute2x128_si256(q_Q10, q_Q10, 0x1),
+ q_Q10,
+ mask);
+
+ /* Update states for best and second best quantization */
+
+ /* Quantized excitation */
+ exc_Q14 = silk_mm256_sign_epi32(_mm256_slli_epi32(SS_Q_Q10, 4), _mm256_broadcastsi128_si256(psDelDec->Seed));
+
+ /* Add predictions */
+ exc_Q14 = _mm256_add_epi32(exc_Q14, _mm256_set1_epi32(LTP_pred_Q14));
+ SS_LPC_exc_Q14 = _mm256_slli_epi32(exc_Q14, 1);
+ SS_xq_Q14 = _mm256_add_epi32(exc_Q14, _mm256_broadcastsi128_si256(LPC_pred_Q14));
+
+ /* Update states */
+ SS_Diff_Q14 = _mm256_sub_epi32(SS_xq_Q14, _mm256_set1_epi32(silk_LSHIFT(x_Q10[i], 4)));
+ SS_LF_AR_Q14 = _mm256_sub_epi32(SS_Diff_Q14, _mm256_broadcastsi128_si256(n_AR_Q14));
+ SS_sLTP_shp_Q14 = silk_mm256_sub_sat_epi32(SS_LF_AR_Q14, _mm256_broadcastsi128_si256(n_LF_Q14));
+
+ /* END Updating Delayed Decision States */
+
+ *smpl_buf_idx = (*smpl_buf_idx + DECISION_DELAY - 1) % DECISION_DELAY;
+ last_smple_idx = (*smpl_buf_idx + decisionDelay) % DECISION_DELAY;
+ psLastSample = &psDelDec->Samples[last_smple_idx];
+
+ /* Find winner */
+ RDmin_Q10 = silk_mm_mask_hmin_epi32(_mm256_castsi256_si128(SS_RD_Q10), MaskDelDec);
+ Winner_selector = silk_index_to_selector(silk_index_of_first_equal_epi32(RDmin_Q10, _mm256_castsi256_si128(SS_RD_Q10)));
+
+ /* Increase RD values of expired states */
+ Winner_rand_state = _mm_shuffle_epi8(psLastSample->RandState, Winner_selector);
+
+ SS_RD_Q10 = _mm256_blendv_epi8(
+ _mm256_add_epi32(SS_RD_Q10, _mm256_set1_epi32(silk_int32_MAX >> 4)),
+ SS_RD_Q10,
+ _mm256_broadcastsi128_si256(_mm_cmpeq_epi32(psLastSample->RandState, Winner_rand_state)));
+
+ /* find worst in first set */
+ RDmax_Q10 = silk_mm_mask_hmax_epi32(_mm256_extracti128_si256(SS_RD_Q10, 0), MaskDelDec);
+ /* find best in second set */
+ RDmin_Q10 = silk_mm_mask_hmin_epi32(_mm256_extracti128_si256(SS_RD_Q10, 1), MaskDelDec);
+
+ /* Replace a state if best from second set outperforms worst in first set */
+ tmp0 = _mm_cmplt_epi32(RDmin_Q10, RDmax_Q10);
+ if (!_mm_test_all_zeros(tmp0, tmp0))
+ {
+ int t;
+ RDmax_ind = silk_index_of_first_equal_epi32(RDmax_Q10, _mm256_extracti128_si256(SS_RD_Q10, 0));
+ RDmin_ind = silk_index_of_first_equal_epi32(RDmin_Q10, _mm256_extracti128_si256(SS_RD_Q10, 1));
+ tmp1 = _mm_cvtepi8_epi32(_mm_cvtsi32_si128(0xFFU << (unsigned)(RDmax_ind << 3)));
+ tmp0 = _mm_blendv_epi8(
+ _mm_set_epi8(0xF, 0xE, 0xD, 0xC, 0xB, 0xA, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0),
+ silk_index_to_selector(RDmin_ind),
+ tmp1);
+ for (t = i; t < MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH; t++)
+ {
+ psDelDec->sLPC_Q14[t] = _mm_shuffle_epi8(psDelDec->sLPC_Q14[t], tmp0);
+ }
+ psDelDec->Seed = _mm_shuffle_epi8(psDelDec->Seed, tmp0);
+ psDelDec->SeedInit = _mm_shuffle_epi8(psDelDec->SeedInit, tmp0);
+ for (t = 0; t < MAX_SHAPE_LPC_ORDER; t++)
+ {
+ psDelDec->sAR2_Q14[t] = _mm_shuffle_epi8(psDelDec->sAR2_Q14[t], tmp0);
+ }
+ for (t = 0; t < DECISION_DELAY; t++)
+ {
+ psDelDec->Samples[t].RandState = _mm_shuffle_epi8(psDelDec->Samples[t].RandState, tmp0);
+ psDelDec->Samples[t].Q_Q10 = _mm_shuffle_epi8(psDelDec->Samples[t].Q_Q10, tmp0);
+ psDelDec->Samples[t].Xq_Q14 = _mm_shuffle_epi8(psDelDec->Samples[t].Xq_Q14, tmp0);
+ psDelDec->Samples[t].Pred_Q15 = _mm_shuffle_epi8(psDelDec->Samples[t].Pred_Q15, tmp0);
+ psDelDec->Samples[t].Shape_Q14 = _mm_shuffle_epi8(psDelDec->Samples[t].Shape_Q14, tmp0);
+ }
+ mask = _mm256_castsi128_si256(_mm_blendv_epi8(_mm_set_epi32(0x3, 0x2, 0x1, 0x0), _mm_set1_epi32(RDmin_ind + 4), tmp1));
+ SS_Q_Q10 = _mm256_permutevar8x32_epi32(SS_Q_Q10, mask);
+ SS_RD_Q10 = _mm256_permutevar8x32_epi32(SS_RD_Q10, mask);
+ SS_xq_Q14 = _mm256_permutevar8x32_epi32(SS_xq_Q14, mask);
+ SS_LF_AR_Q14 = _mm256_permutevar8x32_epi32(SS_LF_AR_Q14, mask);
+ SS_Diff_Q14 = _mm256_permutevar8x32_epi32(SS_Diff_Q14, mask);
+ SS_sLTP_shp_Q14 = _mm256_permutevar8x32_epi32(SS_sLTP_shp_Q14, mask);
+ SS_LPC_exc_Q14 = _mm256_permutevar8x32_epi32(SS_LPC_exc_Q14, mask);
+ }
+
+ /* Write samples from winner to output and long-term filter states */
+ if (subfr > 0 || i >= decisionDelay)
+ {
+ pulses[i - decisionDelay] =
+ (opus_int8)silk_sar_round_32(silk_select_winner(psLastSample->Q_Q10, Winner_selector), 10);
+ xq[i - decisionDelay] =
+ silk_sat16((opus_int32)silk_sar_round_smulww(silk_select_winner(psLastSample->Xq_Q14, Winner_selector), delayedGain_Q10[last_smple_idx], 8));
+ NSQ->sLTP_shp_Q14[NSQ->sLTP_shp_buf_idx - decisionDelay] =
+ silk_select_winner(psLastSample->Shape_Q14, Winner_selector);
+ sLTP_Q15[NSQ->sLTP_buf_idx - decisionDelay] =
+ silk_select_winner(psLastSample->Pred_Q15, Winner_selector);
+ }
+ NSQ->sLTP_shp_buf_idx++;
+ NSQ->sLTP_buf_idx++;
+
+ /* Update states */
+ psSample = &psDelDec->Samples[*smpl_buf_idx];
+ psDelDec->Seed = _mm_add_epi32(psDelDec->Seed, silk_mm_srai_round_epi32(_mm256_castsi256_si128(SS_Q_Q10), 10));
+ psDelDec->LF_AR_Q14 = _mm256_castsi256_si128(SS_LF_AR_Q14);
+ psDelDec->Diff_Q14 = _mm256_castsi256_si128(SS_Diff_Q14);
+ psDelDec->sLPC_Q14[i + NSQ_LPC_BUF_LENGTH] = _mm256_castsi256_si128(SS_xq_Q14);
+ psDelDec->RD_Q10 = _mm256_castsi256_si128(SS_RD_Q10);
+ psSample->Xq_Q14 = _mm256_castsi256_si128(SS_xq_Q14);
+ psSample->Q_Q10 = _mm256_castsi256_si128(SS_Q_Q10);
+ psSample->Pred_Q15 = _mm256_castsi256_si128(SS_LPC_exc_Q14);
+ psSample->Shape_Q14 = _mm256_castsi256_si128(SS_sLTP_shp_Q14);
+ psSample->RandState = psDelDec->Seed;
+ delayedGain_Q10[*smpl_buf_idx] = Gain_Q10;
+ }
+ /* Update LPC states */
+ for (i = 0; i < NSQ_LPC_BUF_LENGTH; i++)
+ {
+ psDelDec->sLPC_Q14[i] = (&psDelDec->sLPC_Q14[length])[i];
+ }
+}
+
+static OPUS_INLINE void silk_nsq_del_dec_scale_states_avx2(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ NSQ_del_dec_struct *psDelDec, /* I/O Delayed decision states */
+ const opus_int16 x16[], /* I Input */
+ opus_int32 x_sc_Q10[MAX_SUB_FRAME_LENGTH], /* O Input scaled with 1/Gain in Q10 */
+ const opus_int16 sLTP[], /* I Re-whitened LTP state in Q0 */
+ opus_int32 sLTP_Q15[], /* O LTP state matching scaled input */
+ opus_int subfr, /* I Subframe number */
+ const opus_int LTP_scale_Q14, /* I LTP state scaling */
+ const opus_int32 Gains_Q16[MAX_NB_SUBFR], /* I */
+ const opus_int pitchL[MAX_NB_SUBFR], /* I Pitch lag */
+ const opus_int signal_type, /* I Signal type */
+ const opus_int decisionDelay /* I Decision delay */
+)
+{
+ int i;
+ opus_int lag;
+ opus_int32 gain_adj_Q16, inv_gain_Q31, inv_gain_Q26;
+ NSQ_del_dec_sample_struct *psSample;
+
+ lag = pitchL[subfr];
+ inv_gain_Q31 = silk_INVERSE32_varQ(silk_max(Gains_Q16[subfr], 1), 47);
+ silk_assert(inv_gain_Q31 != 0);
+
+ /* Scale input */
+ inv_gain_Q26 = silk_sar_round_32(inv_gain_Q31, 5);
+ for (i = 0; i < psEncC->subfr_length; i+=4)
+ {
+ __m256i x = _mm256_cvtepi16_epi64(_mm_loadu_si64(&x16[i]));
+ x = _mm256_slli_epi64(_mm256_mul_epi32(x, _mm256_set1_epi32(inv_gain_Q26)), 16);
+ _mm_storeu_si128((__m128i_u*)&x_sc_Q10[i], silk_cvtepi64_epi32_high(x));
+ }
+
+ /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
+ if (NSQ->rewhite_flag)
+ {
+ if (subfr == 0)
+ {
+ /* Do LTP downscaling */
+ inv_gain_Q31 = silk_LSHIFT(silk_SMULWB(inv_gain_Q31, LTP_scale_Q14), 2);
+ }
+ for (i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++)
+ {
+ silk_assert(i < MAX_FRAME_LENGTH);
+ sLTP_Q15[i] = silk_SMULWB(inv_gain_Q31, sLTP[i]);
+ }
+ }
+
+ /* Adjust for changing gain */
+ if (Gains_Q16[subfr] != NSQ->prev_gain_Q16)
+ {
+ gain_adj_Q16 = silk_DIV32_varQ(NSQ->prev_gain_Q16, Gains_Q16[subfr], 16);
+
+ /* Scale long-term shaping state */
+ for (i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i+=4)
+ {
+ __m128i_u* p = (__m128i_u*)&NSQ->sLTP_shp_Q14[i];
+ *p = silk_mm_smulww_epi32(*p, gain_adj_Q16);
+ }
+
+ /* Scale long-term prediction state */
+ if (signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0)
+ {
+ for (i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++)
+ {
+ sLTP_Q15[i] = ((opus_int64)sLTP_Q15[i]) * ((opus_int64)gain_adj_Q16) >> 16;
+ }
+ }
+
+ /* Scale scalar states */
+ psDelDec->LF_AR_Q14 = silk_mm_smulww_epi32(psDelDec->LF_AR_Q14, gain_adj_Q16);
+ psDelDec->Diff_Q14 = silk_mm_smulww_epi32(psDelDec->Diff_Q14, gain_adj_Q16);
+
+ /* Scale short-term prediction and shaping states */
+ for (i = 0; i < NSQ_LPC_BUF_LENGTH; i++)
+ {
+ psDelDec->sLPC_Q14[i] = silk_mm_smulww_epi32(psDelDec->sLPC_Q14[i], gain_adj_Q16);
+ }
+ for (i = 0; i < DECISION_DELAY; i++)
+ {
+ psSample = &psDelDec->Samples[i];
+ psSample->Pred_Q15 = silk_mm_smulww_epi32(psSample->Pred_Q15, gain_adj_Q16);
+ psSample->Shape_Q14 = silk_mm_smulww_epi32(psSample->Shape_Q14, gain_adj_Q16);
+ }
+ for (i = 0; i < MAX_SHAPE_LPC_ORDER; i++)
+ {
+ psDelDec->sAR2_Q14[i] = silk_mm_smulww_epi32(psDelDec->sAR2_Q14[i], gain_adj_Q16);
+ }
+
+ /* Save inverse gain */
+ NSQ->prev_gain_Q16 = Gains_Q16[subfr];
+ }
+}
+
+static OPUS_INLINE void silk_LPC_analysis_filter_avx2(
+ opus_int16 *out, /* O Output signal */
+ const opus_int16 *in, /* I Input signal */
+ const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */
+ const opus_int32 len, /* I Signal length */
+ const opus_int32 order /* I Filter order */
+)
+{
+ int i;
+ opus_int32 out32_Q12, out32;
+ silk_assert(order == 10 || order == 16);
+
+ for(i = order; i < len; i++ )
+ {
+ const opus_int16 *in_ptr = &in[ i ];
+ /* Allowing wrap around so that two wraps can cancel each other. The rare
+ cases where the result wraps around can only be triggered by invalid streams*/
+
+ __m256i in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&in_ptr[-8]));
+ __m256i B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)& B[0]));
+ __m256i sum = _mm256_mullo_epi32(in_v, silk_mm256_reverse_epi32(B_v));
+ if (order > 10)
+ {
+ in_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&in_ptr[-16]));
+ B_v = _mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i_u*)&B [8]));
+ B_v = silk_mm256_reverse_epi32(B_v);
+ }
+ else
+ {
+ in_v = _mm256_cvtepi16_epi32(_mm_loadu_si32(&in_ptr[-10]));
+ B_v = _mm256_cvtepi16_epi32(_mm_loadu_si32(&B [8]));
+ B_v = _mm256_shuffle_epi32(B_v, 0x01);
+ }
+ sum = _mm256_add_epi32(sum, _mm256_mullo_epi32(in_v, B_v));
+
+ out32_Q12 = silk_mm256_hsum_epi32(sum);
+
+ /* Subtract prediction */
+ out32_Q12 = silk_SUB32_ovflw( silk_LSHIFT( (opus_int32)*in_ptr, 12 ), out32_Q12 );
+
+ /* Scale to Q0 */
+ out32 = silk_sar_round_32(out32_Q12, 12);
+
+ /* Saturate output */
+ out[ i ] = silk_sat16(out32);
+ }
+
+ /* Set first d output samples to zero */
+ silk_memset( out, 0, order * sizeof( opus_int16 ) );
+}
diff --git a/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c b/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c
index 3521cae703..5937682d2a 100644
--- a/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c
+++ b/media/libopus/silk/x86/NSQ_del_dec_sse4_1.c
@@ -119,7 +119,7 @@ void silk_NSQ_del_dec_sse4_1(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -428,7 +428,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
LTP_pred_Q14 = 2;
{
__m128i tmpa, tmpb, pred_lag_ptr_tmp;
- pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+ pred_lag_ptr_tmp = _mm_loadu_si128( (__m128i *)(void*)(&pred_lag_ptr[ -3 ] ) );
pred_lag_ptr_tmp = _mm_shuffle_epi32( pred_lag_ptr_tmp, 0x1B );
tmpa = _mm_mul_epi32( pred_lag_ptr_tmp, b_Q12_0123 );
tmpa = _mm_srli_si128( tmpa, 2 );
@@ -483,7 +483,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
tmpb = _mm_setzero_si128();
/* step 1 */
- psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
+ psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -3 ] ) ); /* -3, -2 , -1, 0 */
psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B ); /* 0, -1, -2, -3 */
tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_0123 ); /* 0, -1, -2, -3 * 0123 -> 0*0, 2*-2 */
@@ -497,7 +497,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
/* step 2 */
- psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -7 ] ) );
+ psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -7 ] ) );
psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_4567 );
tmpa = _mm_srli_epi64( tmpa, 16 );
@@ -512,7 +512,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
if ( opus_likely( predictLPCOrder == 16 ) )
{
/* step 3 */
- psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -11 ] ) );
+ psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -11 ] ) );
psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_89AB );
tmpa = _mm_srli_epi64( tmpa, 16 );
@@ -525,7 +525,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec_sse4_1(
tmpb = _mm_add_epi32( tmpb, psLPC_Q14_tmp );
/* step 4 */
- psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -15 ] ) );
+ psLPC_Q14_tmp = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -15 ] ) );
psLPC_Q14_tmp = _mm_shuffle_epi32( psLPC_Q14_tmp, 0x1B );
tmpa = _mm_mul_epi32( psLPC_Q14_tmp, a_Q12_CDEF );
tmpa = _mm_srli_epi64( tmpa, 16 );
@@ -830,7 +830,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC );
- _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
+ _mm_storeu_si128( (__m128i *)(void*)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
}
for( ; i < psEncC->subfr_length; i++ ) {
@@ -862,7 +862,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
{
- xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+ xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
/* equal shift right 4 bytes*/
xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
@@ -874,7 +874,7 @@ static OPUS_INLINE void silk_nsq_del_dec_scale_states_sse4_1(
xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
- _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+ _mm_storeu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
}
for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
diff --git a/media/libopus/silk/x86/NSQ_sse4_1.c b/media/libopus/silk/x86/NSQ_sse4_1.c
index d5ae1d3b1c..3c9aca7ba1 100644
--- a/media/libopus/silk/x86/NSQ_sse4_1.c
+++ b/media/libopus/silk/x86/NSQ_sse4_1.c
@@ -77,7 +77,7 @@ void silk_NSQ_sse4_1(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -338,21 +338,21 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
xmm_one = _mm_set_epi8( 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 );
/* load a_Q12[0] - a_Q12[7] */
- a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(&a_Q12[ 0 ] ) );
+ a_Q12_01234567 = _mm_loadu_si128( (__m128i *)(void*)(&a_Q12[ 0 ] ) );
/* load a_Q12[ 8 ] - a_Q12[ 15 ] */
- a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(&a_Q12[ 8 ] ) );
+ a_Q12_89ABCDEF = _mm_loadu_si128( (__m128i *)(void*)(&a_Q12[ 8 ] ) );
a_Q12_01234567 = _mm_shuffle_epi8( a_Q12_01234567, xmm_one );
a_Q12_89ABCDEF = _mm_shuffle_epi8( a_Q12_89ABCDEF, xmm_one );
/* load AR_shp_Q13 */
- AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(&AR_shp_Q13[0] ) );
+ AR_shp_Q13_76543210 = _mm_loadu_si128( (__m128i *)(void*)(&AR_shp_Q13[0] ) );
/* load psLPC_Q14 */
xmm_one = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 13, 12, 9, 8, 5, 4, 1, 0 );
- xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-16]) );
- xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[-12]) );
+ xmm_tempa = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[-16]) );
+ xmm_tempb = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[-12]) );
xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
@@ -360,8 +360,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
psLPC_Q14_hi_89ABCDEF = _mm_unpackhi_epi64( xmm_tempa, xmm_tempb );
psLPC_Q14_lo_89ABCDEF = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
- xmm_tempa = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -8 ]) );
- xmm_tempb = _mm_loadu_si128( (__m128i *)(&psLPC_Q14[ -4 ]) );
+ xmm_tempa = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -8 ]) );
+ xmm_tempb = _mm_loadu_si128( (__m128i *)(void*)(&psLPC_Q14[ -4 ]) );
xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
@@ -370,8 +370,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
psLPC_Q14_lo_01234567 = _mm_unpacklo_epi64( xmm_tempa, xmm_tempb );
/* load sAR2_Q14 */
- xmm_tempa = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 0 ]) ) );
- xmm_tempb = _mm_loadu_si128( (__m128i *)(&(NSQ->sAR2_Q14[ 4 ]) ) );
+ xmm_tempa = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sAR2_Q14[ 0 ]) ) );
+ xmm_tempb = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sAR2_Q14[ 4 ]) ) );
xmm_tempa = _mm_shuffle_epi8( xmm_tempa, xmm_one );
xmm_tempb = _mm_shuffle_epi8( xmm_tempb, xmm_one );
@@ -443,7 +443,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
b_Q14_0123 = _mm_shuffle_epi32( b_Q14_3210, 0x1B );
/* loaded: [0] [-1] [-2] [-3] */
- pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(&pred_lag_ptr[ -3 ] ) );
+ pred_lag_ptr_0123 = _mm_loadu_si128( (__m128i *)(void*)(&pred_lag_ptr[ -3 ] ) );
/* shuffle to [-3] [-2] [-1] [0] and to new xmm */
xmm_tempa = _mm_shuffle_epi32( pred_lag_ptr_0123, 0x1B );
/*64-bit multiply, a[2] * b[-2], a[0] * b[0] */
@@ -595,8 +595,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
/* write back sAR2_Q14 */
xmm_tempa = _mm_unpackhi_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
xmm_tempb = _mm_unpacklo_epi16( sAR2_Q14_lo_76543210, sAR2_Q14_hi_76543210 );
- _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa );
- _mm_storeu_si128( (__m128i *)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb );
+ _mm_storeu_si128( (__m128i *)(void*)(&NSQ->sAR2_Q14[ 4 ]), xmm_tempa );
+ _mm_storeu_si128( (__m128i *)(void*)(&NSQ->sAR2_Q14[ 0 ]), xmm_tempb );
/* xq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( psLPC_Q14[ i ], Gain_Q10 ), 8 ) ); */
{
@@ -612,8 +612,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
/* process xq */
for (i = 0; i < length - 7; i += 8)
{
- xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 0 ] ) ) );
- xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(&(psLPC_Q14[ i + 4 ] ) ) );
+ xmm_xq_Q14_3210 = _mm_loadu_si128( (__m128i *)(void*)(&(psLPC_Q14[ i + 0 ] ) ) );
+ xmm_xq_Q14_7654 = _mm_loadu_si128( (__m128i *)(void*)(&(psLPC_Q14[ i + 4 ] ) ) );
/* equal shift right 4 bytes*/
xmm_xq_Q14_x3x1 = _mm_shuffle_epi32( xmm_xq_Q14_3210, _MM_SHUFFLE( 0, 3, 2, 1 ) );
@@ -644,7 +644,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_10_16_sse4_1(
xmm_xq_Q14_3210 = _mm_packs_epi32( xmm_xq_Q14_3210, xmm_xq_Q14_7654 );
/* save to xq */
- _mm_storeu_si128( (__m128i *)(&xq[ i ] ), xmm_xq_Q14_3210 );
+ _mm_storeu_si128( (__m128i *)(void*)(&xq[ i ] ), xmm_xq_Q14_3210 );
}
}
for ( ; i < length; i++)
@@ -698,7 +698,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
xmm_x16_x2x0 = _mm_blend_epi16( xmm_x16_x2x0, xmm_x16_x3x1, 0xCC );
- _mm_storeu_si128( (__m128i *)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
+ _mm_storeu_si128( (__m128i *)(void*)(&(x_sc_Q10[ i ] ) ), xmm_x16_x2x0 );
}
for( ; i < psEncC->subfr_length; i++ ) {
@@ -729,7 +729,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx - 3; i += 4 )
{
- xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
+ xmm_sLTP_shp_Q14_x2x0 = _mm_loadu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ) );
/* equal shift right 4 bytes*/
xmm_sLTP_shp_Q14_x3x1 = _mm_shuffle_epi32( xmm_sLTP_shp_Q14_x2x0, _MM_SHUFFLE( 0, 3, 2, 1 ) );
@@ -741,7 +741,7 @@ static OPUS_INLINE void silk_nsq_scale_states_sse4_1(
xmm_sLTP_shp_Q14_x2x0 = _mm_blend_epi16( xmm_sLTP_shp_Q14_x2x0, xmm_sLTP_shp_Q14_x3x1, 0xCC );
- _mm_storeu_si128( (__m128i *)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
+ _mm_storeu_si128( (__m128i *)(void*)(&(NSQ->sLTP_shp_Q14[ i ] ) ), xmm_sLTP_shp_Q14_x2x0 );
}
for( ; i < NSQ->sLTP_shp_buf_idx; i++ ) {
diff --git a/media/libopus/silk/x86/VAD_sse4_1.c b/media/libopus/silk/x86/VAD_sse4_1.c
index e7eaf9714a..9e06bc79d0 100644
--- a/media/libopus/silk/x86/VAD_sse4_1.c
+++ b/media/libopus/silk/x86/VAD_sse4_1.c
@@ -144,7 +144,7 @@ opus_int silk_VAD_GetSA_Q8_sse4_1( /* O Return value, 0 if s
for( i = 0; i < dec_subframe_length - 7; i += 8 )
{
- xmm_X = _mm_loadu_si128( (__m128i *)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
+ xmm_X = _mm_loadu_si128( (__m128i *)(void*)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
xmm_X = _mm_srai_epi16( xmm_X, 3 );
xmm_X = _mm_madd_epi16( xmm_X, xmm_X );
xmm_acc = _mm_add_epi32( xmm_acc, xmm_X );
diff --git a/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c b/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c
index 2c7d18d05e..df4626b60a 100644
--- a/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c
+++ b/media/libopus/silk/x86/VQ_WMat_EC_sse4_1.c
@@ -65,7 +65,7 @@ void silk_VQ_WMat_EC_sse4_1(
neg_xX_Q24[ 3 ] = -silk_LSHIFT32( xX_Q17[ 3 ], 7 );
neg_xX_Q24[ 4 ] = -silk_LSHIFT32( xX_Q17[ 4 ], 7 );
- v_XX_31_Q17 = _mm_loadu_si128( (__m128i *)(&XX_Q17[ 1 ] ) );
+ v_XX_31_Q17 = _mm_loadu_si128( (__m128i *)(void*)(&XX_Q17[ 1 ] ) );
v_XX_42_Q17 = _mm_shuffle_epi32( v_XX_31_Q17, _MM_SHUFFLE( 0, 3, 2, 1 ) );
/* Loop over codebook */
diff --git a/media/libopus/silk/x86/main_sse.h b/media/libopus/silk/x86/main_sse.h
index a01d7f6c75..b254d53e7a 100644
--- a/media/libopus/silk/x86/main_sse.h
+++ b/media/libopus/silk/x86/main_sse.h
@@ -88,7 +88,7 @@ void silk_NSQ_sse4_1(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -116,7 +116,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -142,7 +142,7 @@ void silk_NSQ_del_dec_sse4_1(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -154,7 +154,33 @@ void silk_NSQ_del_dec_sse4_1(
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
-# if defined OPUS_X86_PRESUME_SSE4_1
+void silk_NSQ_del_dec_avx2(
+ const silk_encoder_state *psEncC, /* I Encoder State */
+ silk_nsq_state *NSQ, /* I/O NSQ state */
+ SideInfoIndices *psIndices, /* I/O Quantization Indices */
+ const opus_int16 x16[], /* I Input */
+ opus_int8 pulses[], /* O Quantized pulse signal */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
+ const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR], /* I Long term prediction coefs */
+ const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER], /* I Noise shaping coefs */
+ const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR], /* I Long term shaping coefs */
+ const opus_int Tilt_Q14[MAX_NB_SUBFR], /* I Spectral tilt */
+ const opus_int32 LF_shp_Q14[MAX_NB_SUBFR], /* I Low frequency shaping coefs */
+ const opus_int32 Gains_Q16[MAX_NB_SUBFR], /* I Quantization step sizes */
+ const opus_int32 pitchL[MAX_NB_SUBFR], /* I Pitch lags */
+ const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
+ const opus_int LTP_scale_Q14 /* I LTP state scaling */
+);
+
+# if defined (OPUS_X86_PRESUME_AVX2)
+
+# define OVERRIDE_silk_NSQ_del_dec
+# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
+ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
+ ((void)(arch),silk_NSQ_del_dec_avx2(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
+ HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
+
+# elif defined (OPUS_X86_PRESUME_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
@@ -170,7 +196,7 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -243,5 +269,31 @@ extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
# endif
+#ifndef FIXED_POINT
+double silk_inner_product_FLP_avx2(
+ const silk_float *data1,
+ const silk_float *data2,
+ opus_int dataSize
+);
+
+#if defined (OPUS_X86_PRESUME_AVX2)
+
+#define OVERRIDE_inner_product_FLP
+#define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,silk_inner_product_FLP_avx2(data1, data2, dataSize))
+
+#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
+
+#define OVERRIDE_inner_product_FLP
+extern double (*const SILK_INNER_PRODUCT_FLP_IMPL[OPUS_ARCHMASK + 1])(
+ const silk_float *data1,
+ const silk_float *data2,
+ opus_int dataSize
+);
+
+#define silk_inner_product_FLP(data1, data2, dataSize, arch) ((void)arch,(*SILK_INNER_PRODUCT_FLP_IMPL[(arch) & OPUS_ARCHMASK])(data1, data2, dataSize))
+
+#endif
+#endif
+
# endif
#endif
diff --git a/media/libopus/silk/x86/x86_silk_map.c b/media/libopus/silk/x86/x86_silk_map.c
index 70f60078cf..39ad75276c 100644
--- a/media/libopus/silk/x86/x86_silk_map.c
+++ b/media/libopus/silk/x86/x86_silk_map.c
@@ -32,10 +32,13 @@
#include "celt/x86/x86cpu.h"
#include "structs.h"
#include "SigProc_FIX.h"
+#ifndef FIXED_POINT
+#include "SigProc_FLP.h"
+#endif
#include "pitch.h"
#include "main.h"
-#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1)
+#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_AVX2)
#if defined(FIXED_POINT)
@@ -72,7 +75,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -117,7 +120,7 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
- const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
+ const opus_int16 *PredCoef_Q12, /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
@@ -132,7 +135,7 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_NSQ_del_dec_c,
silk_NSQ_del_dec_c,
MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */
- MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */
+ MAY_HAVE_AVX2( silk_NSQ_del_dec ) /* avx */
};
#if defined(FIXED_POINT)
@@ -156,4 +159,21 @@ void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )(
};
#endif
+
+#ifndef FIXED_POINT
+
+double (*const SILK_INNER_PRODUCT_FLP_IMPL[ OPUS_ARCHMASK + 1 ] )(
+ const silk_float *data1,
+ const silk_float *data2,
+ opus_int dataSize
+) = {
+ silk_inner_product_FLP_c, /* non-sse */
+ silk_inner_product_FLP_c,
+ silk_inner_product_FLP_c,
+ silk_inner_product_FLP_c, /* sse4.1 */
+ MAY_HAVE_AVX2( silk_inner_product_FLP ) /* avx */
+};
+
+#endif
+
#endif
diff --git a/media/libopus/sources.mozbuild b/media/libopus/sources.mozbuild
index 891f5b32ae..1a480a0783 100644
--- a/media/libopus/sources.mozbuild
+++ b/media/libopus/sources.mozbuild
@@ -47,6 +47,10 @@ celt_sources_sse4_1 = [
'celt/x86/pitch_sse4_1.c',
]
+celt_sources_avx2 = [
+ 'celt/x86/pitch_avx.c',
+]
+
celt_sources_arm_rtcd = [
'celt/arm/arm_celt_map.c',
'celt/arm/armcpu.c',
@@ -71,6 +75,7 @@ celt_sources_arm_ne10 = [
]
opus_sources = [
+ 'src/extensions.c',
'src/opus.c',
'src/opus_decoder.c',
'src/opus_encoder.c',
@@ -174,6 +179,10 @@ silk_sources_sse4_1 = [
'silk/x86/VQ_WMat_EC_sse4_1.c',
]
+silk_sources_avx2 = [
+ 'silk/x86/NSQ_del_dec_avx2.c',
+]
+
silk_sources_arm_rtcd = [
'silk/arm/arm_silk_map.c',
]
@@ -249,3 +258,7 @@ silk_sources_float = [
'silk/float/wrappers_FLP.c',
]
+silk_sources_float_avx2 = [
+ 'silk/float/x86/inner_product_FLP_avx2.c',
+]
+
diff --git a/media/libopus/src/analysis.c b/media/libopus/src/analysis.c
index 058328f0fd..1f58013812 100644
--- a/media/libopus/src/analysis.c
+++ b/media/libopus/src/analysis.c
@@ -929,9 +929,9 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
features[23] = info->tonality_slope + 0.069216f;
features[24] = tonal->lowECount - 0.067930f;
- compute_dense(&layer0, layer_out, features);
- compute_gru(&layer1, tonal->rnn_state, layer_out);
- compute_dense(&layer2, frame_probs, tonal->rnn_state);
+ analysis_compute_dense(&layer0, layer_out, features);
+ analysis_compute_gru(&layer1, tonal->rnn_state, layer_out);
+ analysis_compute_dense(&layer2, frame_probs, tonal->rnn_state);
/* Probability of speech or music vs noise */
info->activity_probability = frame_probs[1];
diff --git a/media/libopus/src/extensions.c b/media/libopus/src/extensions.c
new file mode 100644
index 0000000000..bb6c0b0268
--- /dev/null
+++ b/media/libopus/src/extensions.c
@@ -0,0 +1,315 @@
+/* Copyright (c) 2022 Amazon */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include "opus_types.h"
+#include "opus_defines.h"
+#include "arch.h"
+#include "os_support.h"
+#include "opus_private.h"
+
+
+/* Given an extension payload, advance data to the next extension and return the
+ length of the remaining extensions. */
+opus_int32 skip_extension(const unsigned char **data, opus_int32 len, opus_int32 *header_size)
+{
+ int id, L;
+ if (len==0)
+ return 0;
+ id = **data>>1;
+ L = **data&1;
+ if (id == 0 && L == 1)
+ {
+ *header_size = 1;
+ if (len < 1)
+ return -1;
+ (*data)++;
+ len--;
+ return len;
+ } else if (id > 0 && id < 32)
+ {
+ if (len < 1+L)
+ return -1;
+ *data += 1+L;
+ len -= 1+L;
+ *header_size = 1;
+ return len;
+ } else {
+ if (L==0)
+ {
+ *data += len;
+ *header_size = 1;
+ return 0;
+ } else {
+ opus_int32 bytes=0;
+ *header_size = 1;
+ do {
+ (*data)++;
+ len--;
+ if (len == 0)
+ return -1;
+ bytes += **data;
+ (*header_size)++;
+ } while (**data == 255);
+ (*data)++;
+ len--;
+ if (bytes <= len)
+ {
+ len -= bytes;
+ *data += bytes;
+ } else {
+ return -1;
+ }
+ return len;
+ }
+ }
+}
+
+/* Count the number of extensions, excluding real padding and separators. */
+opus_int32 opus_packet_extensions_count(const unsigned char *data, opus_int32 len)
+{
+ opus_int32 curr_len;
+ opus_int32 count=0;
+ const unsigned char *curr_data = data;
+
+ celt_assert(len >= 0);
+ celt_assert(data != NULL || len == 0);
+
+ curr_len = len;
+ while (curr_len > 0)
+ {
+ int id;
+ opus_int32 header_size;
+ id = *curr_data>>1;
+ curr_len = skip_extension(&curr_data, curr_len, &header_size);
+ if (curr_len < 0)
+ return OPUS_INVALID_PACKET;
+ if (id > 1)
+ count++;
+ }
+ return count;
+}
+
+/* Extract extensions from Opus padding (excluding real padding and separators) */
+opus_int32 opus_packet_extensions_parse(const unsigned char *data, opus_int32 len, opus_extension_data *extensions, opus_int32 *nb_extensions)
+{
+ const unsigned char *curr_data;
+ opus_int32 curr_len;
+ int curr_frame=0;
+ opus_int32 count=0;
+
+ celt_assert(len >= 0);
+ celt_assert(data != NULL || len == 0);
+ celt_assert(nb_extensions != NULL);
+ celt_assert(extensions != NULL || *nb_extensions == 0);
+
+ curr_data = data;
+ curr_len = len;
+ while (curr_len > 0)
+ {
+ int id;
+ opus_int32 header_size;
+ opus_extension_data curr_ext;
+ id = *curr_data>>1;
+ if (id > 1)
+ {
+ curr_ext.id = id;
+ curr_ext.frame = curr_frame;
+ curr_ext.data = curr_data;
+ } else if (id == 1)
+ {
+ int L = *curr_data&1;
+ if (L==0)
+ curr_frame++;
+ else {
+ if (curr_len >= 2)
+ curr_frame += curr_data[1];
+ /* Else we're at the end and it doesn't matter. */
+ }
+ if (curr_frame >= 48)
+ {
+ *nb_extensions = count;
+ return OPUS_INVALID_PACKET;
+ }
+ }
+ curr_len = skip_extension(&curr_data, curr_len, &header_size);
+ /* printf("curr_len = %d, header_size = %d\n", curr_len, header_size); */
+ if (curr_len < 0)
+ {
+ *nb_extensions = count;
+ return OPUS_INVALID_PACKET;
+ }
+ celt_assert(curr_data - data == len - curr_len);
+ if (id > 1)
+ {
+ if (count == *nb_extensions)
+ {
+ return OPUS_BUFFER_TOO_SMALL;
+ }
+ curr_ext.len = curr_data - curr_ext.data - header_size;
+ curr_ext.data += header_size;
+ extensions[count++] = curr_ext;
+ }
+ }
+ celt_assert(curr_len == 0);
+ *nb_extensions = count;
+ return OPUS_OK;
+}
+
+opus_int32 opus_packet_extensions_generate(unsigned char *data, opus_int32 len, const opus_extension_data *extensions, opus_int32 nb_extensions, int pad)
+{
+ int max_frame=0;
+ opus_int32 i;
+ int frame;
+ int curr_frame = 0;
+ opus_int32 pos = 0;
+ opus_int32 written = 0;
+
+ celt_assert(len >= 0);
+
+ for (i=0;i<nb_extensions;i++)
+ {
+ max_frame = IMAX(max_frame, extensions[i].frame);
+ if (extensions[i].id < 2 || extensions[i].id > 127)
+ return OPUS_BAD_ARG;
+ }
+ if (max_frame >= 48) return OPUS_BAD_ARG;
+ for (frame=0;frame<=max_frame;frame++)
+ {
+ for (i=0;i<nb_extensions;i++)
+ {
+ if (extensions[i].frame == frame)
+ {
+ /* Insert separator when needed. */
+ if (frame != curr_frame) {
+ int diff = frame - curr_frame;
+ if (len-pos < 2)
+ return OPUS_BUFFER_TOO_SMALL;
+ if (diff == 1) {
+ if (data) data[pos] = 0x02;
+ pos++;
+ } else {
+ if (data) data[pos] = 0x03;
+ pos++;
+ if (data) data[pos] = diff;
+ pos++;
+ }
+ curr_frame = frame;
+ }
+ if (extensions[i].id < 32)
+ {
+ if (extensions[i].len < 0 || extensions[i].len > 1)
+ return OPUS_BAD_ARG;
+ if (len-pos < extensions[i].len+1)
+ return OPUS_BUFFER_TOO_SMALL;
+ if (data) data[pos] = (extensions[i].id<<1) + extensions[i].len;
+ pos++;
+ if (extensions[i].len > 0) {
+ if (data) data[pos] = extensions[i].data[0];
+ pos++;
+ }
+ } else {
+ int last;
+ opus_int32 length_bytes;
+ if (extensions[i].len < 0)
+ return OPUS_BAD_ARG;
+ last = (written == nb_extensions - 1);
+ length_bytes = 1 + extensions[i].len/255;
+ if (last)
+ length_bytes = 0;
+ if (len-pos < 1 + length_bytes + extensions[i].len)
+ return OPUS_BUFFER_TOO_SMALL;
+ if (data) data[pos] = (extensions[i].id<<1) + !last;
+ pos++;
+ if (!last)
+ {
+ opus_int32 j;
+ for (j=0;j<extensions[i].len/255;j++) {
+ if (data) data[pos] = 255;
+ pos++;
+ }
+ if (data) data[pos] = extensions[i].len % 255;
+ pos++;
+ }
+ if (data) OPUS_COPY(&data[pos], extensions[i].data, extensions[i].len);
+ pos += extensions[i].len;
+ }
+ written++;
+ }
+ }
+ }
+ /* If we need to pad, just prepend 0x01 bytes. Even better would be to fill the
+ end with zeros, but that requires checking that turning the last extesion into
+ an L=1 case still fits. */
+ if (pad && pos < len)
+ {
+ opus_int32 padding = len - pos;
+ if (data) {
+ OPUS_MOVE(data+padding, data, pos);
+ for (i=0;i<padding;i++)
+ data[i] = 0x01;
+ }
+ pos += padding;
+ }
+ return pos;
+}
+
+#if 0
+#include <stdio.h>
+int main()
+{
+ opus_extension_data ext[] = {{2, 0, (const unsigned char *)"a", 1},
+ {32, 10, (const unsigned char *)"DRED", 4},
+ {33, 1, (const unsigned char *)"NOT DRED", 8},
+ {3, 4, (const unsigned char *)NULL, 0}
+ };
+ opus_extension_data ext2[10];
+ int i, len;
+ int nb_ext = 10;
+ unsigned char packet[10000];
+ len = opus_packet_extensions_generate(packet, 32, ext, 4, 1);
+ for (i=0;i<len;i++)
+ {
+ printf("%#04x ", packet[i]);
+ if (i%16 == 15)
+ printf("\n");
+ }
+ printf("\n");
+ printf("count = %d\n", opus_packet_extensions_count(packet, len));
+ opus_packet_extensions_parse(packet, len, ext2, &nb_ext);
+ for (i=0;i<nb_ext;i++)
+ {
+ int j;
+ printf("%d %d {", ext2[i].id, ext2[i].frame);
+ for (j=0;j<ext2[i].len;j++) printf("%#04x ", ext2[i].data[j]);
+ printf("} %d\n", ext2[i].len);
+ }
+}
+#endif
diff --git a/media/libopus/src/mapping_matrix.c b/media/libopus/src/mapping_matrix.c
index 31298af057..3f78ab5990 100644
--- a/media/libopus/src/mapping_matrix.c
+++ b/media/libopus/src/mapping_matrix.c
@@ -302,6 +302,287 @@ const opus_int16 mapping_matrix_toa_mixing_data[324] = {
0, 0, 0, 32767
};
+const MappingMatrix mapping_matrix_fourthoa_mixing = { 27, 27, 0 };
+const opus_int16 mapping_matrix_fourthoa_mixing_data[729] = {
+ 9243, 0, 16010, 0, 0, 0, 20669, 0,
+ 0, 0, 0, 0, 24456, 0, 0, 0,
+ 0, 0, 0, 0, 27731, 0, 0, 0,
+ 0, 0, 0, 9243, 0, 10884, 11741, 0,
+ 0, 3995, 17849, 9626, 0, 0, 0, -5727,
+ 14399, 17315, 7625, 0, 0, 0, 0, -11747,
+ 2574, 18637, 15552, 5930, 0, 0, 9243, -14302,
+ -2682, -6677, 13337, 5357, -9464, 2501, -11170, 4770,
+ -5911, 11501, 5858, 5369, 4951, 17901, -19071, -2397,
+ -9281, -9198, 7576, -4294, 7773, -8997, -3399, 0,
+ 0, 9243, 9940, 11991, -3705, -5144, 16647, 7057,
+ -6206, -5941, -2698, -10194, 16781, -1788, -6256, -11772,
+ 4935, 3912, -6062, -13039, 9446, -9758, -3521, -15058,
+ 11089, 565, 0, 0, 9243, -15376, 3720, 2461,
+ -5285, -7989, -8660, 1278, -16087, 15811, -3249, 10500,
+ -7757, -1680, -9890, -8153, 10884, 11022, 2847, 12828,
+ 5137, -2053, 8666, -5684, 14776, 0, 0, 9243,
+ -10577, 10304, -6186, 9139, -15222, 2507, -8902, -5140,
+ -145, 15562, -10596, -7311, -6197, -8753, 8667, -6014,
+ -281, 15033, 938, -11859, 548, -8456, 16735, -3654,
+ 0, 0, 9243, 8974, 4839, -12343, -15472, 6066,
+ -7501, -8343, 5015, 15920, -12374, -4559, -9400, 6271,
+ 4011, 5191, -9932, 14438, 4828, -8768, 1909, 12059,
+ -1565, 4707, -13711, 0, 0, 9243, 15799, 2085,
+ -1534, -3386, 4602, -9808, -447, -17267, -18054, -1167,
+ -13525, -4644, 1313, -5951, 5397, 7485, -7056, 2584,
+ -8120, 8669, 788, 13177, 2109, 18349, 0, 0,
+ 9243, 12371, -10036, 1597, 2760, -17341, 1848, -2239,
+ -10509, -8474, -4577, 11164, 7935, 1441, 17430, -3436,
+ -3713, 15936, 4184, 2647, -11730, 341, -15934, 6462,
+ 6581, 0, 0, 9243, -8963, 2184, 13084, -16381,
+ -2734, -9757, 3991, 6345, -18297, -5912, 7604, -4849,
+ -11100, 2290, -4304, -13305, -7488, 12338, 4805, 8505,
+ -7014, -4779, -1761, -14597, 0, 0, 9243, 1301,
+ -15498, 3799, 690, -2816, 18718, -8223, 889, 255,
+ -1768, 4485, -19951, 13097, -2278, 167, 78, -740,
+ 3324, -6139, 19488, -17925, 4283, -486, 20, 0,
+ 0, 9243, -13470, -6719, 5452, -10257, 12641, -4873,
+ -5116, -10595, 5856, 11389, 1502, 10876, -608, 11765,
+ -13218, 13911, -7373, -2070, -13679, -4154, 5536, -2138,
+ 16643, 451, 0, 0, 9243, 2455, -3679, -15387,
+ -5277, -1261, -8697, 7906, 16112, 8147, 3208, -1690,
+ 7687, 10593, -9796, -15852, -10884, -5616, 2881, 2032,
+ 5246, -12735, -8796, 10928, 14833, 0, 0, 9243,
+ -6849, 2775, -14202, 13586, -2655, -9402, -5505, 10809,
+ -18013, 6231, 5444, -6041, 11288, 4958, -4078, 18799,
+ -9368, -9291, 4535, 7383, 9405, -7391, -2121, -4336,
+ 0, 0, 9243, 6423, -9040, 11548, 10359, -8109,
+ -450, -14580, 6431, 10857, -15475, 3569, 9707, 6416,
+ -9607, 521, 8528, -18391, 11049, 3815, -10423, 6860,
+ 6860, -883, -4221, 0, 0, 9243, 11932, -5968,
+ -8850, -14749, -9946, -6026, 7377, -4472, 5206, 14547,
+ -3406, 10508, 2526, 4411, 14543, 8444, -5822, 347,
+ 12347, -1709, -9158, 105, -16265, -12642, 0, 0,
+ 9243, 13044, -150, 9282, 16910, -274, -10332, -194,
+ -5864, 5428, -420, -12196, 344, -8679, 145, -18554,
+ -12695, -152, -14635, 503, 10389, 358, 5076, 522,
+ -16100, 0, 0, 9243, -8374, -13590, -1221, 1428,
+ 15896, 12005, 2318, -4793, 2590, -3209, -20390, -6256,
+ -2974, 10766, 1202, -876, -6597, 5004, 19896, -1541,
+ 2902, -16788, -3062, 1340, 0, 0, 9243, 9879,
+ 10267, 7300, 10073, 14167, 2416, 10469, -3094, 2899,
+ 17092, 9762, -7400, 7214, -5250, -8238, -3989, 5578,
+ 16392, -1050, -11848, -776, -5034, -15850, -5882, 0,
+ 0, 9243, -4974, -9068, 12221, -8490, 6299, -388,
+ -15478, 8702, -9920, 12723, -2810, 9668, 6905, -13040,
+ 4325, -9456, 16856, -9159, -2909, -10476, 7149, 9387,
+ -7350, 233, 0, 0, 9243, 3627, -13823, -7218,
+ -3656, -7002, 12776, 13935, 2719, 2446, 8352, 9252,
+ -7676, -18413, -6212, -429, -1272, -6335, -13356, -9510,
+ 295, 18926, 9934, 1112, -382, 0, 0, 9243,
+ -6383, -9343, -11326, 10097, 8329, 223, 14780, 6114,
+ -10348, -15590, -4195, 9257, -7445, -9439, -323, 7902,
+ 18117, 12101, -3142, -10944, -5577, 7327, 566, -4133,
+ 0, 0, 9243, 2626, 865, 15769, 5783, 317,
+ -10244, 1905, 16884, 9144, 826, -2420, -1972, -14536,
+ 2413, 16939, 12500, 1482, -4906, -578, 10096, -3476,
+ -14323, 2745, 16105, 0, 0, 9243, -8975, 12086,
+ 5450, -6832, -15149, 7333, 9200, -3550, -362, -13645,
+ -15525, -1391, 9428, -7091, -5442, 3105, -820, -17685,
+ -9175, -9462, 5572, -9191, -12325, -2180, 0, 0,
+ 9243, -114, 11576, -11058, 177, -185, 5875, -17880,
+ 8539, -198, 339, -173, -3411, -16698, 16336, -6369,
+ 193, -430, 408, -75, -10806, -7225, 19670, -13817,
+ 4665, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 32767, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 32767
+};
+
+const MappingMatrix mapping_matrix_fifthoa_mixing = { 38, 38, 0 };
+const opus_int16 mapping_matrix_fifthoa_mixing_data[1444] = {
+ 9243, 0, 16010, 0, 0, 0, 20669, 0,
+ 0, 0, 0, 0, 24456, 0, 0, 0,
+ 0, 0, 0, 0, 27731, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 30657, 0,
+ 0, 0, 0, 0, 0, 0, 9243, 0,
+ -7023, 14387, 0, 0, -4369, -14112, 14455, 0,
+ 0, 0, 10931, -510, -16777, 14031, 0, 0,
+ 0, 0, -5118, 14286, 4343, -18465, 13374, 0,
+ 0, 0, 0, 0, -6494, -12221, 11761, 8513,
+ -19458, 12605, 0, 0, 9243, -14128, 5093, 5547,
+ -10946, -10050, -7197, 3945, -11790, 7142, -9213, 6529,
+ -9701, -2563, -9923, -14846, 16521, 6816, 2764, 14103,
+ 1118, -5537, 2977, -14168, 1228, 4866, 17430, -528,
+ 10639, 2641, 10437, -1037, 11460, 1098, 1296, 15737,
+ 0, 0, 9243, 1128, -14775, 6062, 955, -2329,
+ 16069, -12511, 2477, 579, -2333, 3440, -14197, 18478,
+ -6050, 940, 303, -1604, 4106, -4223, 9829, -22688,
+ 10647, -2604, 334, 145, -927, 3203, -6017, 4507,
+ -3812, 24212, -15600, 5198, -1023, 110, 0, 0,
+ 9243, 1158, 12997, 9277, 1501, 2103, 10097, 16840,
+ 5916, 1402, 3225, 2488, 2929, 19916, 12706, 3585,
+ 1137, 3415, 4698, 2078, -5442, 16634, 18511, 8731,
+ 2095, 850, 3061, 5733, 5225, 960, -11728, 7689,
+ 20588, 14659, 5642, 1187, 0, 0, 9243, -4663,
+ -3081, -15003, 9771, 2007, -9185, 6457, 14199, -14357,
+ -4976, 3554, 6625, 11434, -7231, -11297, 17760, 8291,
+ -6267, -3368, 6712, -10837, -9107, 6524, 6793, -19531,
+ -11338, 7934, 7335, -2205, -9215, -7094, 10659, 6243,
+ -4337, -1250, 0, 0, 9243, -13515, 7679, -3831,
+ 7232, -14496, -3201, -4109, -11731, 8828, 9178, -1901,
+ -10848, -539, -14888, 9626, -10860, 12703, 3824, 12334,
+ -7104, 3496, -6203, 13852, 5461, -2109, -17277, 7837,
+ -4714, 13901, 4097, 3940, 7647, 8546, 8688, -10986,
+ 0, 0, 9243, 8113, -9860, 9657, 10943, -11174,
+ 1426, -13300, 1915, 8178, -17833, 6805, 8309, 8100,
+ -3121, -4742, 2683, -15111, 15688, 2358, -11590, 2807,
+ 2746, 8762, -7430, -2251, -5481, 16370, -4081, -9694,
+ 5872, -11539, -714, -9492, 15177, -6126, 0, 0,
+ 9243, 9933, -9215, -8528, -11831, -12785, -62, 10976,
+ -1811, 5593, 18018, 6100, 9455, -5237, 2758, 8971,
+ 2743, -9659, -13517, 5330, -10737, -4576, -2069, -15491,
+ -8749, -7226, -5237, 9191, -181, -12277, 2815, 10540,
+ -27, 14741, 16703, 3103, 0, 0, 9243, -10067,
+ -8881, -8723, 12265, 12487, -793, 10821, -1762, -6021,
+ -18002, -5072, 9912, -4395, 2587, 9368, -2767, 10021,
+ 12259, -6468, -10113, -5605, -1761, -15590, -9430, 7800,
+ 5092, -8835, 2293, 12314, 1222, 10671, -329, 13745,
+ 17349, 3563, 0, 0, 9243, -6485, 12991, -6743,
+ 6108, -11768, 10080, -12236, 238, -2883, 13115, -13907,
+ 2900, -14460, 511, 2564, 186, -7019, 19094, -11597,
+ -5472, -12058, 744, 6243, -2384, 930, 501, -11778,
+ 21214, -5330, -11746, -5542, 827, 10475, -6418, 1132,
+ 0, 0, 9243, 3862, 5238, -14627, -7891, 2826,
+ -7015, -10701, 13900, 11410, -6831, -1679, -9861, 6359,
+ 12032, -11660, -14041, 11199, 1713, -3895, 657, 14749,
+ -3017, -11445, 8380, 15575, -15236, -346, 7690, -923,
+ 10317, 3498, -13545, 354, 9093, -4476, 0, 0,
+ 9243, -8417, 13183, 3418, -4018, -15498, 10685, 6294,
+ -4132, 1419, -8755, -18818, 3926, 7642, -9001, -3235,
+ 2125, 3506, -13037, -16570, -4337, 6729, -13404, -7991,
+ 59, 443, 5804, 6005, -15011, -9060, -11044, 3679,
+ -15434, -13685, 161, 1185, 0, 0, 9243, -5288,
+ 6773, -13508, 9977, -5002, -4784, -12780, 10790, -12942,
+ 11168, 519, -10890, 1326, 12078, -6274, 13780, -16427,
+ 2186, 5352, -4328, 13671, 2364, -7963, 1080, -12568,
+ 19336, -6557, -8574, 4084, 7277, 10433, -9273, -3178,
+ 1516, 3817, 0, 0, 9243, 9660, 7817, 10093,
+ 13619, 10548, -2942, 11021, 597, 9663, 17594, 1736,
+ -10794, 1814, 771, -8469, 1041, 14155, 7891, -8597,
+ -7498, -8982, 346, -12407, -11848, -6809, 1686, 9181,
+ -8306, -10247, 3538, -10706, -364, -8047, -19188, -8493,
+ 0, 0, 9243, -7163, -1020, 14282, -14289, 1021,
+ -10208, -2036, 10660, -18919, 2410, 6564, 2323, -13088,
+ -1798, 3365, -19498, 3619, 12022, -1858, 9978, 3705,
+ -8969, -643, -5794, -15523, 4123, 15113, -3949, -6265,
+ -3596, 12490, 2946, -2688, 1225, -14570, 0, 0,
+ 9243, -12187, 772, -10354, 17623, -1314, -10262, -1117,
+ -2885, -9937, 2249, 11267, -1763, 9572, -368, 16506,
+ -6510, -1438, -15014, 2402, 10157, 2041, 2458, 2389,
+ -19346, 19860, -1041, 8067, -3704, -10931, 2743, -9286,
+ 606, -13399, -3095, 7924, 0, 0, 9243, 15545,
+ -2367, -3011, -6538, -5139, -9657, 995, -16242, -15706,
+ 2557, -12952, 5226, 2508, 6353, 10156, 13593, 6966,
+ 4795, 8960, 8183, -1735, 11914, -4504, 14149, 11727,
+ -6665, 10460, -3962, 10145, -7648, -1965, -9845, -6764,
+ -6938, -16633, 0, 0, 9243, 3098, 12983, -8841,
+ -3826, 5618, 10053, -16031, 4787, 3283, -8209, 6632,
+ 2856, -18922, 10272, -2055, -2344, 7987, -11939, 5516,
+ -5520, -15739, 14940, -5001, 530, 1465, -6306, 13388,
+ -13243, 2513, -11772, -7170, 16572, -8384, 1426, 168,
+ 0, 0, 9243, -15767, -2008, -1916, 4220, 4422,
+ -9846, 537, -17105, 17650, -1400, 13589, 4481, 1651,
+ 5677, 6701, -9241, -6642, -3252, -7827, 8792, -951,
+ 13182, -2522, 17586, -17005, 3845, -12562, 2213, -11472,
+ -6688, -1394, -8970, -4769, -7316, -11753, 0, 0,
+ 9243, -13344, -3829, 7975, -14863, 7136, -8561, -4265,
+ -7992, -801, 9405, 8912, 7937, -5326, 5057, -17681,
+ 15207, 575, 7717, -11360, 4847, 6789, 4150, 12686,
+ -10050, 16730, -12063, 322, -12920, -3313, -10267, 1980,
+ -6948, 7112, 7972, 8042, 0, 0, 9243, 7791,
+ -1021, 13949, 15180, -1111, -10208, -1989, 9348, 19199,
+ -2561, -7140, 2323, -12782, -1577, 817, 18164, -3673,
+ -12771, 2022, 9978, 3620, -7865, -156, -9155, 11924,
+ -3842, -15336, 4196, 6814, -3596, 12199, 2583, -652,
+ 1936, -17637, 0, 0, 9243, -4810, -15144, -1958,
+ 1315, 10175, 17406, 4142, -1348, 263, -3292, -15632,
+ -17046, -6363, 3374, 605, -227, -748, 5997, 20334,
+ 14481, 8277, -6146, -1717, 5, 27, 712, 1542,
+ -9197, -23572, -10163, -9595, 9425, 3539, -17, -72,
+ 0, 0, 9243, -7366, 8261, 11568, -11901, -8499,
+ -2079, 13347, 5556, -12049, -16247, -2282, -10529, 3584,
+ 7585, -1577, -8464, -18652, -8902, 5913, -8688, -9287,
+ 4156, -2442, -7089, -2993, -14485, -13949, 5422, 8459,
+ 1638, -13285, -2531, -1826, -12132, -9456, 0, 0,
+ 9243, 11716, 698, -10889, -17818, 1143, -10275, -1062,
+ -1305, 12057, -2057, -10855, -1595, 10088, -150, 15043,
+ 2978, 1578, 15225, -2090, 10201, 1943, 1115, 1969,
+ -20211, -17636, 430, -9826, 3391, 10572, 2485, -9826,
+ 248, -12259, -2924, 12131, 0, 0, 9243, 4361,
+ -4594, -14703, -8956, -2798, -7781, 9434, 13769, 12936,
+ 6800, -2400, 9082, 8091, -10453, -11023, -15786, -11136,
+ 3285, 4153, 2658, -14002, -5051, 9489, 7000, 17206,
+ 15024, -2777, -8491, -42, -10626, 141, 13053, 2366,
+ -6662, -2231, 0, 0, 9243, -752, -11933, -10646,
+ 1119, 1254, 6890, 17745, 7875, -1203, -2207, -1251,
+ 2024, -17706, -15532, -5600, 1128, 2691, 2800, 683,
+ -9927, 9661, 19706, 12522, 3889, -978, -2789, -3992,
+ -2440, 206, 12695, 2921, -17173, -18575, -9616, -2657,
+ 0, 0, 9243, 4791, -15001, -2887, -1931, -10037,
+ 16885, 6048, -1020, 46, 4789, 15191, -15922, -9154,
+ 2530, 823, 252, -130, -8608, -19335, 12613, 11651,
+ -4549, -2314, -172, -101, -784, 265, 12975, 21741,
+ -7551, -13101, 6856, 4710, 535, -46, 0, 0,
+ 9243, -12153, -10395, 754, -1281, 17644, 2735, -1095,
+ -10274, 8359, 2200, -12593, 7083, 782, 17650, -1573,
+ 1685, -16282, -2164, -530, -11878, 32, -17359, 3065,
+ 6651, -5212, -3628, 19365, 965, 13180, 8243, -818,
+ 7746, -3645, -14323, 1670, 0, 0, 9243, -6961,
+ -11198, 9081, -8829, 10887, 4833, -14202, 2374, -6524,
+ 16339, -9417, 4737, 12284, -4394, -2691, -2683, 13690,
+ -18539, 2830, -11438, -3692, 4985, 5648, -4628, 514,
+ 6225, -18409, 12672, 5311, 11170, -6928, -3407, -7595,
+ 10737, -3977, 0, 0, 9243, 12099, -10405, 1294,
+ 2187, -17582, 2760, -1880, -10105, -8058, -3760, 12583,
+ 7058, 1346, 17376, -2667, -2829, 15710, 3705, 468,
+ -11880, 50, -17123, 5201, 6230, 4698, 6098, -18716,
+ -1665, -13088, 8285, -1400, 7696, -6196, -13429, 2770,
+ 0, 0, 9243, 8602, 13392, 1722, 2070, 16090,
+ 11359, 3222, -4960, -2638, 4581, 20106, 5099, 4026,
+ -10978, -1778, -1314, -6620, 6988, 18701, -2965, 3745,
+ -16745, -4461, 1300, 584, -3646, -11588, 8350, 11847,
+ -10050, 2372, -20010, -7809, 3608, 887, 0, 0,
+ 9243, 14252, -1958, 7026, 13986, -3899, -9870, -1922,
+ -10736, -3693, -4527, -12333, 4376, -6080, 3475, -18537,
+ -19222, 1355, -10843, 6913, 8869, 3408, 8323, 6804,
+ -5141, -13648, 7800, 2649, 7171, 10505, -6548, 5179,
+ -5505, 13299, 2086, 15579, 0, 0, 9243, 11323,
+ 9021, -6835, -10810, 14267, -489, -8613, -5689, 639,
+ -16117, 6224, -9731, -3757, -8482, 10882, 7873, 1080,
+ -11447, -6791, -10388, 4099, -6025, 18396, -5407, -7536,
+ 14714, 984, 1267, -13940, -1889, 8416, 666, 16762,
+ -10106, -3418, 0, 0, 9243, 871, 4833, 15238,
+ 1855, 588, -7508, 10287, 16162, 2857, 1481, -443,
+ -9392, -7758, 12910, 16506, 3837, 2588, -581, -851,
+ 1928, -14879, -5066, 14950, 16498, 4773, 3842, -425,
+ -1785, -82, 10578, -1435, -15554, -2459, 16520, 16250,
+ 0, 0, 9243, 14762, 5967, 1673, 3450, 12303,
+ -6027, 1394, -15022, -14571, 3402, -4217, -10507, -478,
+ -14813, -5131, -6634, -16293, -82, -15276, -1705, -1731,
+ 358, -5738, 13681, 12503, -8200, -3023, -3290, -7384,
+ 9272, -837, 14328, -1064, 16913, 7915, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 32767, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 32767
+};
+
const MappingMatrix mapping_matrix_foa_demixing = { 6, 6, 0 };
const opus_int16 mapping_matrix_foa_demixing_data[36] = {
16384, 16384, 16384, 16384, 0, 0, 0, 23170,
@@ -376,3 +657,283 @@ const opus_int16 mapping_matrix_toa_demixing_data[324] = {
0, 0, 0, 32767
};
+const MappingMatrix mapping_matrix_fourthoa_demixing = { 27, 27, 0 };
+const opus_int16 mapping_matrix_fourthoa_demixing_data[729] = {
+ 4870, 4484, 4870, 4347, 4440, 4726, 4683, 4821,
+ 4883, 4842, 4603, 4484, 4683, 4698, 4234, 4368,
+ 4603, 4783, 4783, 4820, 4821, 4347, 4820, 4440,
+ 4698, 0, 0, 101, 84, -7818, 4640, -7178,
+ -5492, 4629, 8384, 6547, -4966, 617, -6345, 1061,
+ -3241, 2939, 5549, 6390, -4434, 4994, -2610, 1993,
+ -2873, 1547, -4356, -164, 0, 0, 8797, 5074,
+ -1553, 5383, 1906, 5297, 2722, 1158, -5226, 1311,
+ -7760, -3327, -1940, 1586, -4093, -2951, -214, -6873,
+ 5450, -4875, -7193, -4438, 558, 5593, 5607, 0,
+ 0, -26, 5761, -3723, -1460, 1195, -3065, -6357,
+ -1175, 608, 6965, 2310, 2759, -8023, -7138, 5162,
+ -3624, 5006, -809, 3592, 6209, -4159, -4968, 8150,
+ 2513, -5702, 0, 0, 301, 109, 7161, -2462,
+ -2443, 5044, -7125, -2256, 1967, -9107, 259, -4928,
+ -2592, 6514, 4111, -7236, 8695, 635, 5009, -4025,
+ -1937, 4794, 3420, -3507, -400, 0, 0, -134,
+ 85, 2771, 7842, -3649, -8225, 2866, 2586, -9200,
+ -1945, -1563, 6155, -720, -1061, -3494, -4513, -487,
+ 8389, 7317, 3348, -3721, 3806, 371, -6896, 70,
+ 0, 0, 10919, 2072, -4867, 3472, -4429, 1721,
+ -4066, -5193, 1032, -5253, 9501, -2017, -3971, -5261,
+ -306, -2737, -5137, 5713, 1237, -8, 6387, 364,
+ -5423, 3364, 2888, 0, 0, -48, 8946, 1048,
+ -2691, 602, -4332, -4302, -514, -1730, 2459, -4328,
+ -2156, 3335, -2748, -6029, 4023, 155, 897, 5268,
+ -8380, 7625, 7395, 508, 3945, -8951, 0, 0,
+ 39, 4151, -5965, -3398, -7006, -3534, 2697, -8989,
+ -5237, 2913, 46, -5540, 8196, 5766, 2711, -2520,
+ -3043, -2146, -948, 4965, 1806, 2472, 8988, -1266,
+ 4840, 0, 0, -407, -189, 2179, -1627, 6516,
+ 259, 7196, -9449, -4905, -9766, 561, 4021, 3371,
+ -8650, 5032, 3329, 2534, 641, 2224, -5747, 1047,
+ -4074, 5252, -24, 674, 0, 0, 664, 237,
+ -2837, -4072, -1205, 8252, -5875, -1670, -2743, -3984,
+ 381, 5059, 1765, 2666, -8295, 7403, 1154, -2086,
+ 7622, 7105, 3677, -6943, 1050, -6632, -694, 0,
+ 0, 382, -133, 5699, 7650, 5154, -5713, -1645,
+ -6902, 6181, 4450, 1151, 410, -993, 3829, 2444,
+ -2405, -6618, -9514, 5366, -1896, 5844, -2886, -1524,
+ -7321, -1007, 0, 0, 12767, -2530, 3183, -1409,
+ -4015, -2894, -5155, -1710, 3841, -2107, -10274, 5119,
+ 3979, -4010, 5550, 4822, -746, -2507, -3080, 4289,
+ -3675, 4333, -1416, -1230, -1122, 0, 0, 17,
+ 8048, 2398, -2167, -73, -3606, 3125, 398, 731,
+ -5973, 5705, -1032, 4679, 7305, 3134, 1301, -3858,
+ -89, 2938, 4359, -9155, -4805, -8407, 3673, -8645,
+ 0, 0, 187, 7355, 3145, -6719, -4432, -5939,
+ 2541, -2810, 9723, 778, -1105, 5687, -4174, 2534,
+ -4461, 1017, -244, 5481, -1655, -6765, -3350, -4894,
+ 1592, -2318, 8827, 0, 0, 196, 3588, 9631,
+ 3063, -4564, 6043, 2683, 2595, -2488, -2186, 173,
+ -6059, -8270, -2386, 409, 7441, -8608, 376, -4364,
+ 2321, -280, 97, 8331, -3022, -4721, 0, 0,
+ 117, -748, -10833, 1533, 4200, -2875, -997, -109,
+ -3661, -6119, 4454, 8808, -9189, 8294, 1521, 7265,
+ -2348, -5094, -948, -5400, -3193, 8914, 5763, 1716,
+ -1070, 0, 0, 2497, 399, -5201, -2038, 7843,
+ -376, 7567, -5073, 7616, -5537, 2086, -3453, -5544,
+ -56, -11648, -1314, 3546, -3432, -117, 8694, -4245,
+ 9621, 3098, -2582, -2351, 0, 0, 4386, -3104,
+ -3132, -10512, 566, 5217, 5128, 4967, 1348, 7035,
+ -1470, 91, -125, -3548, 8244, -3029, -10033, 2186,
+ 9745, -6440, -2074, 3638, -1477, -7045, -562, 0,
+ 0, 2154, 8116, -6102, 6570, 12998, -712, -4126,
+ -4996, 30, 1571, -6393, -12794, 425, 5036, 1190,
+ 5763, 5653, 12933, -6671, 5197, -2964, -3316, -6354,
+ -10554, -2652, 0, 0, 12618, -3737, 93, -5901,
+ 4262, -3364, 4444, 3103, -2767, 3403, 4925, -2584,
+ -989, 4977, -3714, -1965, 3076, 326, -2946, -2568,
+ 1026, -2980, 3362, -6132, -5966, 0, 0, 6001,
+ 48, -1979, -7275, 3476, -2096, 10591, 3793, 2629,
+ -447, -14747, -3689, -5525, 8358, 6883, -9703, -4556,
+ 7471, 2965, 4056, 13221, -7327, -3073, -2353, -6720,
+ 0, 0, 621, 11034, -44, -2828, 5978, -1850,
+ -1772, 3894, -7471, -1397, 945, -2028, -2928, -2240,
+ 3172, 2222, 4544, -4243, -5645, 3745, 2573, 3511,
+ -8206, -7286, 5700, 0, 0, 321, 10818, -4982,
+ 7813, -749, 9907, 1360, -1443, 568, -1103, 2305,
+ 6045, 2270, -1063, -1920, -3073, 5893, -3476, -11346,
+ -1657, -588, 2957, -2287, -8527, -8041, 0, 0,
+ 119, -268, 2372, -3040, 4979, -3789, -5630, 10619,
+ 5900, -5109, -4585, -3862, 10467, -3527, -385, -10034,
+ -9991, 4860, 984, 2362, 2311, -6804, 6324, 433,
+ 5291, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 32767, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 32767
+};
+
+const MappingMatrix mapping_matrix_fifthoa_demixing = { 38, 38, 0 };
+const opus_int16 mapping_matrix_fifthoa_demixing_data[1444] = {
+ 3188, 3247, 3268, 3368, 3368, 3138, 3268, 3099,
+ 3211, 3368, 3099, 3247, 3211, 3368, 3368, 3368,
+ 3149, 3268, 3247, 3211, 3099, 3188, 3138, 3149,
+ 3099, 3188, 3368, 3149, 3188, 3247, 3268, 3138,
+ 3211, 3368, 3138, 3149, 0, 0, 118, -47,
+ -5011, 282, 333, -1497, -4584, 2908, 3388, -3647,
+ -2493, 1139, -2882, -1719, 3604, -2543, -4328, 5443,
+ 1286, -5498, -4583, 2510, -1743, -2556, 4168, 1446,
+ -290, 1812, -4074, -2377, 4152, 2847, 4991, 3980,
+ 393, 5072, 0, 0, 5489, -2235, 1507, -5326,
+ 4609, -1096, 2926, -3427, -3301, -3078, 4226, 1730,
+ 4627, 2561, 2966, -592, 143, -677, 4617, -755,
+ -956, -433, -5138, 3037, 157, -1394, -4498, -4984,
+ -3661, -4112, -3756, 4628, -570, 3356, 1605, 1803,
+ 0, 0, -162, 5162, 2132, 2392, 3556, -5141,
+ -1536, 2975, -3001, -3350, -2231, -5230, 1294, -4965,
+ 3494, 5230, -3292, -1359, -2945, -773, 2670, 4867,
+ -660, 3720, -3415, -5112, -3700, -1211, 407, 3013,
+ 763, 591, 2481, -2657, 5210, 784, 0, 0,
+ -156, 338, -4246, 510, 462, 3296, 2846, 3333,
+ -4292, 4574, 1940, -2986, -1275, 3701, 5022, -5250,
+ 5780, -2676, -1180, 1516, -4852, 4877, 342, -3923,
+ -5703, -2920, 379, -657, -361, -3346, 1044, 795,
+ 5257, -4004, 698, 1115, 0, 0, 47, -140,
+ -3292, -1097, 652, 855, -5260, -3691, -4470, 4521,
+ -3863, 1093, -5552, -2016, 3831, 334, -456, -1532,
+ 2068, 1788, 2054, -295, 3668, -2820, 328, -994,
+ 295, -3301, 5770, 4282, -6353, 5632, -1371, 5005,
+ 238, 4041, 0, 0, 6764, -1659, -2730, 5726,
+ 3715, -3216, -933, 531, -52, -345, 3022, -2818,
+ 4005, -1617, -1189, -3748, -3403, -3592, 4040, -3553,
+ -2806, -3444, 6023, -711, -3298, -2503, 2548, 5564,
+ 940, 1848, 1207, 4010, -3488, -358, -2511, -1966,
+ 0, 0, -64, -5039, 1403, -4455, 6240, 2189,
+ -1716, -4348, 4183, 3951, -4042, -3606, 2399, -4563,
+ 4050, -612, -395, 348, -5791, 391, -1440, -735,
+ 1398, 4359, -518, 2969, 6556, 1951, -518, -4993,
+ -925, 998, -569, -2934, 3460, 420, 0, 0,
+ 16, 5482, -4122, 770, 2082, 5020, -3961, 485,
+ -584, -793, 3, 5222, -1416, 3673, 78, 3549,
+ -937, -5723, 1673, -6162, -2540, 3082, -355, 1838,
+ -615, 4601, 2832, -359, -3346, 668, -3393, -1583,
+ -3774, -2206, 5754, -4961, 0, 0, -328, 299,
+ 2470, 317, 525, -4494, 2805, 2617, 2383, -2363,
+ -1037, 4085, 895, -4622, 3218, -6607, -3381, -5933,
+ 1397, 6394, -446, 5694, 14, -4510, 4329, 3690,
+ -334, 0, 2932, -2478, -2944, -577, -599, -230,
+ 1553, -4736, 0, 0, -324, 142, -3252, -867,
+ 1111, -1882, 3378, -6055, 6502, -6840, 4280, -2694,
+ -2876, 4190, 6454, 655, 1061, 626, -2669, -798,
+ 3192, -985, -898, -5482, -548, 2315, -558, 1302,
+ 900, 5747, -1325, 1599, -1384, -5749, 624, 1110,
+ 0, 0, 321, 312, 2188, 1322, 237, 708,
+ -304, 2463, 1500, -1094, -5112, -1010, -6799, 646,
+ 992, 1969, 3423, -3996, 2628, 4451, 3432, -2833,
+ -6101, -330, -3768, -3, -707, 5961, -4037, -3736,
+ 4080, 7254, -4113, 2151, 54, -2150, 0, 0,
+ 7735, 4064, -3884, -5240, 577, 2229, -3947, 2914,
+ 3555, 4011, 774, -3519, 1985, -3701, -3824, 330,
+ -905, 2085, 1155, 2176, 3006, 340, -5533, -3264,
+ -902, 3114, 344, -5060, 1524, 1805, 1926, 2350,
+ 1905, -3203, -2762, -4162, 0, 0, 193, -151,
+ -1434, 6289, 7354, 4234, 169, 2868, -1977, -1375,
+ -4987, 2345, 2742, 599, 939, -4837, 2688, 991,
+ -6907, 716, -1542, -4346, -1833, 1493, 3134, 2903,
+ -7019, -2835, 93, 4395, 621, 870, -2357, -975,
+ -2933, -127, 0, 0, -616, -5968, -3479, -1651,
+ 4932, -2445, -5512, -1451, 691, 739, 479, 4227,
+ -2886, 3853, 8, -501, 188, 1990, 3842, 2270,
+ 1662, -174, 1290, 2456, 67, -3267, -5535, 483,
+ 5721, -1642, 6501, -3432, 1184, -3246, 4101, -4880,
+ 0, 0, -465, 5264, -4812, 682, 1683, -4539,
+ 2916, -1985, 2899, 3324, 1060, -4398, -745, -2137,
+ -3827, 1044, 6225, 3609, -532, 1980, -6001, 564,
+ -209, -1299, 5336, -3605, -1484, 37, 19, -1295,
+ -665, -385, -6773, 3651, 6153, -1291, 0, 0,
+ 193, -415, 5166, -110, 626, 6743, -2860, 1425,
+ 1101, -1341, 80, -4533, 249, 4231, -119, -6009,
+ -2970, 5170, -822, -2610, 4527, 5948, 182, -2589,
+ 837, -5471, 371, -43, 373, -665, -1233, -626,
+ -7353, 2606, 1339, -1398, 0, 0, -533, 147,
+ 2075, -672, 1043, 3503, 4402, -4971, -3287, 3731,
+ -2606, 3817, 1972, -5603, 5114, 1185, -1318, 1906,
+ 3018, -1999, 343, -1943, 207, -6744, 913, -4060,
+ 645, -349, -5667, 4766, 5575, -1733, 1116, 160,
+ 1534, -5690, 0, 0, -137, -36, 1556, 1325,
+ 1553, -2230, 1188, 5296, -5104, 4673, 6295, 498,
+ -4723, 933, 2994, 4067, -4700, 1758, -4116, -1252,
+ 2444, -4092, 1653, -2802, 5069, 1133, 790, -2355,
+ -934, -6304, 1642, 2045, -4259, -3873, -213, 215,
+ 0, 0, -364, 423, 4888, -1316, 118, -950,
+ 4027, 114, 2961, -3136, -3012, -883, -6192, 1340,
+ -3210, -1193, 1376, 3128, 1596, -2994, -3194, 533,
+ 8502, 2487, -1485, 1032, 301, -8007, -577, 887,
+ 297, 7778, 3121, -1901, -94, -6401, 0, 0,
+ 9260, -1845, 668, 2787, -2255, 2699, -2512, -3737,
+ -3675, -3601, -1803, 210, -1701, -1442, -2700, 3457,
+ 2868, 2079, -2113, 3178, 1277, 3578, 5240, -2482,
+ 3324, 1020, -4027, 3835, -3758, -3633, -3170, -1310,
+ 2509, -3110, 713, 174, 0, 0, -399, 4969,
+ -2321, -7744, 6494, -3776, 1478, 758, -1794, -2233,
+ -4059, 4932, 2770, 4761, -3475, 1243, 829, -651,
+ -5358, -436, 2381, 1360, 2561, -3118, 858, -4366,
+ 3933, 3646, -43, -1310, -16, 924, 1197, 1415,
+ -5036, -376, 0, 0, 100, 1410, 1290, 3199,
+ 7091, -3638, -2641, 1118, 45, -441, 794, -974,
+ -5033, 889, 438, -3102, 895, 3555, 4672, 4795,
+ 1129, -2408, -2153, 1742, 159, -2040, 7578, -2006,
+ -5737, 1986, -5568, -6413, 2428, -1387, -2441, 667,
+ 0, 0, -37, -6031, -4434, -904, 3290, 1806,
+ 4736, 2516, -5905, -5927, 1754, -4300, -2468, -2203,
+ -4836, -672, 1444, -1591, -1631, -1789, 4311, -153,
+ -688, -1222, 1058, 3139, 4659, -353, 1543, 1838,
+ 2180, -1448, 2432, 6277, 5304, -1692, 0, 0,
+ -280, 4506, 807, -477, 823, 3550, 1427, -1856,
+ -3003, -3501, -1203, 2679, 933, 778, -4954, -1977,
+ -7458, 4687, 435, 7045, -4053, -3130, 257, -3917,
+ -6165, 1889, 927, 235, 1889, -1097, 1985, 630,
+ -2172, -2130, 7080, 4810, 0, 0, -300, 496,
+ 2808, 279, 667, -7179, -2661, -526, -2832, 1751,
+ 2849, 4829, -906, -4151, -1124, -3062, 8166, 5361,
+ -1656, -6017, 3265, 2551, -864, -432, -6966, 6295,
+ -168, 901, 442, -582, 269, 236, -3574, 799,
+ 472, 565, 0, 0, 805, -2466, 6208, -4592,
+ -170, -6701, -5610, 3678, -4242, 4561, -724, -5534,
+ 2415, 7354, 2761, 2699, -349, 3822, -2372, 1756,
+ -5523, -3445, -588, -5749, -3986, 9804, -3871, 5375,
+ -2308, 5504, -2766, -1651, 1472, 6832, 2705, -5104,
+ 0, 0, -700, -1179, 4402, 400, 1383, 939,
+ -1342, 6013, 2577, -3472, 472, 2883, 1450, -3917,
+ 2849, 5084, 4990, 5392, 342, -4925, -3329, -5372,
+ -2674, -6035, -5072, -836, 179, 2506, 7987, -3647,
+ -8202, -1437, 1891, 2400, 1607, -3611, 0, 0,
+ -4706, -4003, 9928, -379, 5557, 3738, -8789, 685,
+ 1937, -5157, 13388, 7995, -4119, -9909, -5079, 4804,
+ 5586, 774, -5430, 299, -9943, 3264, -3690, -3901,
+ -1133, -6199, 3182, 1544, 5467, 3686, -2639, 4068,
+ 1163, -185, -1299, -506, 0, 0, 843, 1005,
+ -1059, 467, -1279, -2259, 6057, -1694, -5885, 5342,
+ -5160, -3748, -1382, 4420, -697, -2000, -3808, 3100,
+ 2685, -4073, 531, 318, -7822, 2414, 2901, 3399,
+ -1340, 8449, 3685, 463, -3341, 2423, 2304, -2723,
+ 84, -2622, 0, 0, 12088, -265, 2562, -435,
+ -4348, -2426, 3538, 1552, 1279, 883, -4166, 2634,
+ -6130, 2994, 3729, -1570, -601, -1753, -5124, -2788,
+ -2096, -1920, -2649, 2793, -1079, -1952, 2983, -1530,
+ 2499, 1769, 1492, -6757, -2108, 2841, 1466, 2597,
+ 0, 0, -3830, -4093, 2448, 12720, 7737, -665,
+ -832, -9257, 2971, -2400, 791, 1873, 1072, -587,
+ -7440, 8055, 1531, -4736, 616, -1782, -2982, 9663,
+ -5057, -5926, 1610, -4489, 7033, -8658, 6010, -5673,
+ 5648, 812, -271, -1802, -4500, 4392, 0, 0,
+ -888, -327, 3373, -1084, 7959, 2430, 1898, -2360,
+ -1820, -1377, -1090, -4436, -3422, -1106, -3230, 3876,
+ -41, -5128, 6375, -1848, -3824, 5844, 617, -1957,
+ 4232, 1345, -1439, -83, 3046, -214, 5458, -5566,
+ -4387, -3738, -5740, 8657, 0, 0, 6978, 6239,
+ -3686, -981, -2854, 78, 5859, -357, 4618, 7391,
+ -138, 971, -5799, 2135, 4478, -7004, -5949, 1668,
+ -6933, -1163, 7010, -5624, 2990, 6192, -8075, 3567,
+ -8308, 2236, -5098, -2120, -4355, -4238, 4955, 10230,
+ 692, -5606, 0, 0, -1348, -7069, -12, -4927,
+ 1211, 651, 1360, 7744, 3404, 5069, -2438, -105,
+ 2332, 1494, -4686, 1336, -3628, -881, 2474, 1736,
+ -26, -257, 2135, -4452, 446, -641, -4704, 2605,
+ -6436, 6662, -4939, 990, -1100, -3782, 5028, 4753,
+ 0, 0, -2875, 6410, 3518, 3950, 1271, 869,
+ -2842, -5837, 1532, -2899, 1140, -597, 1712, -1988,
+ -4819, -4783, 4773, -8796, 2240, -4596, 3565, -4853,
+ -556, -3974, 7366, -4370, 3113, -3548, 3552, -5450,
+ 3869, 2514, 6736, -4570, 6074, 3151, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 32767, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 32767
+};
diff --git a/media/libopus/src/mapping_matrix.h b/media/libopus/src/mapping_matrix.h
index 98bc82df3e..53646cb19f 100644
--- a/media/libopus/src/mapping_matrix.h
+++ b/media/libopus/src/mapping_matrix.h
@@ -117,6 +117,12 @@ extern const opus_int16 mapping_matrix_soa_mixing_data[121];
extern const MappingMatrix mapping_matrix_toa_mixing;
extern const opus_int16 mapping_matrix_toa_mixing_data[324];
+extern const MappingMatrix mapping_matrix_fourthoa_mixing;
+extern const opus_int16 mapping_matrix_fourthoa_mixing_data[729];
+
+extern const MappingMatrix mapping_matrix_fifthoa_mixing;
+extern const opus_int16 mapping_matrix_fifthoa_mixing_data[1444];
+
extern const MappingMatrix mapping_matrix_foa_demixing;
extern const opus_int16 mapping_matrix_foa_demixing_data[36];
@@ -126,6 +132,12 @@ extern const opus_int16 mapping_matrix_soa_demixing_data[121];
extern const MappingMatrix mapping_matrix_toa_demixing;
extern const opus_int16 mapping_matrix_toa_demixing_data[324];
+extern const MappingMatrix mapping_matrix_fourthoa_demixing;
+extern const opus_int16 mapping_matrix_fourthoa_demixing_data[729];
+
+extern const MappingMatrix mapping_matrix_fifthoa_demixing;
+extern const opus_int16 mapping_matrix_fifthoa_demixing_data[1444];
+
#ifdef __cplusplus
}
#endif
diff --git a/media/libopus/src/mlp.c b/media/libopus/src/mlp.c
index 964c6a98f6..e658ccde0d 100644
--- a/media/libopus/src/mlp.c
+++ b/media/libopus/src/mlp.c
@@ -33,35 +33,23 @@
#include "opus_types.h"
#include "opus_defines.h"
#include "arch.h"
-#include "tansig_table.h"
#include "mlp.h"
+#define fmadd(a, b, c) ((a)*(b)+(c))
static OPUS_INLINE float tansig_approx(float x)
{
- int i;
- float y, dy;
- float sign=1;
- /* Tests are reversed to catch NaNs */
- if (!(x<8))
- return 1;
- if (!(x>-8))
- return -1;
-#ifndef FIXED_POINT
- /* Another check in case of -ffast-math */
- if (celt_isnan(x))
- return 0;
-#endif
- if (x<0)
- {
- x=-x;
- sign=-1;
- }
- i = (int)floor(.5f+25*x);
- x -= .04f*i;
- y = tansig_table[i];
- dy = 1-y*y;
- y = y + x*dy*(1 - y*x);
- return sign*y;
+ const float N0 = 952.52801514f;
+ const float N1 = 96.39235687f;
+ const float N2 = 0.60863042f;
+ const float D0 = 952.72399902f;
+ const float D1 = 413.36801147f;
+ const float D2 = 11.88600922f;
+ float X2, num, den;
+ X2 = x*x;
+ num = fmadd(fmadd(N2, X2, N1), X2, N0);
+ den = fmadd(fmadd(D2, X2, D1), X2, D0);
+ num = num*x/den;
+ return MAX32(-1.f, MIN32(1.f, num));
}
static OPUS_INLINE float sigmoid_approx(float x)
@@ -79,7 +67,7 @@ static void gemm_accum(float *out, const opus_int8 *weights, int rows, int cols,
}
}
-void compute_dense(const DenseLayer *layer, float *output, const float *input)
+void analysis_compute_dense(const AnalysisDenseLayer *layer, float *output, const float *input)
{
int i;
int N, M;
@@ -101,7 +89,7 @@ void compute_dense(const DenseLayer *layer, float *output, const float *input)
}
}
-void compute_gru(const GRULayer *gru, float *state, const float *input)
+void analysis_compute_gru(const AnalysisGRULayer *gru, float *state, const float *input)
{
int i;
int N, M;
diff --git a/media/libopus/src/mlp.h b/media/libopus/src/mlp.h
index d7670550fd..e6b1a8f76c 100644
--- a/media/libopus/src/mlp.h
+++ b/media/libopus/src/mlp.h
@@ -24,8 +24,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef _MLP_H_
-#define _MLP_H_
+#ifndef MLP_H_
+#define MLP_H_
#include "opus_types.h"
@@ -39,7 +39,7 @@ typedef struct {
int nb_inputs;
int nb_neurons;
int sigmoid;
-} DenseLayer;
+} AnalysisDenseLayer;
typedef struct {
const opus_int8 *bias;
@@ -47,14 +47,14 @@ typedef struct {
const opus_int8 *recurrent_weights;
int nb_inputs;
int nb_neurons;
-} GRULayer;
+} AnalysisGRULayer;
-extern const DenseLayer layer0;
-extern const GRULayer layer1;
-extern const DenseLayer layer2;
+extern const AnalysisDenseLayer layer0;
+extern const AnalysisGRULayer layer1;
+extern const AnalysisDenseLayer layer2;
-void compute_dense(const DenseLayer *layer, float *output, const float *input);
+void analysis_compute_dense(const AnalysisDenseLayer *layer, float *output, const float *input);
-void compute_gru(const GRULayer *gru, float *state, const float *input);
+void analysis_compute_gru(const AnalysisGRULayer *gru, float *state, const float *input);
-#endif /* _MLP_H_ */
+#endif /* MLP_H_ */
diff --git a/media/libopus/src/mlp_data.c b/media/libopus/src/mlp_data.c
index ae4178df76..65f7448ea0 100644
--- a/media/libopus/src/mlp_data.c
+++ b/media/libopus/src/mlp_data.c
@@ -651,20 +651,20 @@ static const opus_int8 layer2_bias[2] = {
14, 117
};
-const DenseLayer layer0 = {
+const AnalysisDenseLayer layer0 = {
layer0_bias,
layer0_weights,
25, 32, 0
};
-const GRULayer layer1 = {
+const AnalysisGRULayer layer1 = {
layer1_bias,
layer1_weights,
layer1_recur_weights,
32, 24
};
-const DenseLayer layer2 = {
+const AnalysisDenseLayer layer2 = {
layer2_bias,
layer2_weights,
24, 2, 1
diff --git a/media/libopus/src/opus.c b/media/libopus/src/opus.c
index 538b5ea74e..816a4dd5fa 100644
--- a/media/libopus/src/opus.c
+++ b/media/libopus/src/opus.c
@@ -194,7 +194,8 @@ int opus_packet_get_samples_per_frame(const unsigned char *data,
int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
int self_delimited, unsigned char *out_toc,
const unsigned char *frames[48], opus_int16 size[48],
- int *payload_offset, opus_int32 *packet_offset)
+ int *payload_offset, opus_int32 *packet_offset,
+ const unsigned char **padding, opus_int32 *padding_len)
{
int i, bytes;
int count;
@@ -337,6 +338,11 @@ int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
data += size[i];
}
+ if (padding != NULL)
+ {
+ *padding = data;
+ *padding_len = pad;
+ }
if (packet_offset)
*packet_offset = pad+(opus_int32)(data-data0);
@@ -351,6 +357,6 @@ int opus_packet_parse(const unsigned char *data, opus_int32 len,
opus_int16 size[48], int *payload_offset)
{
return opus_packet_parse_impl(data, len, 0, out_toc,
- frames, size, payload_offset, NULL);
+ frames, size, payload_offset, NULL, NULL, NULL);
}
diff --git a/media/libopus/src/opus_decoder.c b/media/libopus/src/opus_decoder.c
index 6520e748ea..b57c809434 100644
--- a/media/libopus/src/opus_decoder.c
+++ b/media/libopus/src/opus_decoder.c
@@ -52,6 +52,15 @@
#include "mathops.h"
#include "cpu_support.h"
+#ifdef ENABLE_DEEP_PLC
+#include "dred_rdovae_dec_data.h"
+#include "dred_rdovae_dec.h"
+#endif
+
+#ifdef ENABLE_OSCE
+#include "osce.h"
+#endif
+
struct OpusDecoder {
int celt_dec_offset;
int silk_dec_offset;
@@ -59,7 +68,11 @@ struct OpusDecoder {
opus_int32 Fs; /** Sampling rate (at the API level) */
silk_DecControlStruct DecControl;
int decode_gain;
+ int complexity;
int arch;
+#ifdef ENABLE_DEEP_PLC
+ LPCNetPLCState lpcnet;
+#endif
/* Everything beyond this point gets cleared on a reset */
#define OPUS_DECODER_RESET_START stream_channels
@@ -135,6 +148,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
silk_dec = (char*)st+st->silk_dec_offset;
celt_dec = (CELTDecoder*)((char*)st+st->celt_dec_offset);
st->stream_channels = st->channels = channels;
+ st->complexity = 0;
st->Fs = Fs;
st->DecControl.API_sampleRate = st->Fs;
@@ -152,6 +166,9 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
st->prev_mode = 0;
st->frame_size = Fs/400;
+#ifdef ENABLE_DEEP_PLC
+ lpcnet_plc_init( &st->lpcnet);
+#endif
st->arch = opus_select_arch();
return OPUS_OK;
}
@@ -370,7 +387,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
pcm_ptr = pcm_silk;
if (st->prev_mode==MODE_CELT_ONLY)
- silk_InitDecoder( silk_dec );
+ silk_ResetDecoder( silk_dec );
/* The SILK PLC cannot produce frames of less than 10 ms */
st->DecControl.payloadSize_ms = IMAX(10, 1000 * audiosize / st->Fs);
@@ -394,14 +411,28 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
st->DecControl.internalSampleRate = 16000;
}
}
+ st->DecControl.enable_deep_plc = st->complexity >= 5;
+#ifdef ENABLE_OSCE
+ st->DecControl.osce_method = OSCE_METHOD_NONE;
+#ifndef DISABLE_LACE
+ if (st->complexity >= 6) {st->DecControl.osce_method = OSCE_METHOD_LACE;}
+#endif
+#ifndef DISABLE_NOLACE
+ if (st->complexity >= 7) {st->DecControl.osce_method = OSCE_METHOD_NOLACE;}
+#endif
+#endif
- lost_flag = data == NULL ? 1 : 2 * decode_fec;
+ lost_flag = data == NULL ? 1 : 2 * !!decode_fec;
decoded_samples = 0;
do {
/* Call SILK decoder */
int first_frame = decoded_samples == 0;
silk_ret = silk_Decode( silk_dec, &st->DecControl,
- lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size, st->arch );
+ lost_flag, first_frame, &dec, pcm_ptr, &silk_frame_size,
+#ifdef ENABLE_DEEP_PLC
+ &st->lpcnet,
+#endif
+ st->arch );
if( silk_ret ) {
if (lost_flag) {
/* PLC failure should not be fatal */
@@ -521,8 +552,12 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
if (mode != st->prev_mode && st->prev_mode > 0 && !st->prev_redundancy)
MUST_SUCCEED(celt_decoder_ctl(celt_dec, OPUS_RESET_STATE));
/* Decode CELT */
- celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data,
- len, pcm, celt_frame_size, &dec, celt_accum);
+ celt_ret = celt_decode_with_ec_dred(celt_dec, decode_fec ? NULL : data,
+ len, pcm, celt_frame_size, &dec, celt_accum
+#ifdef ENABLE_DEEP_PLC
+ , &st->lpcnet
+#endif
+ );
} else {
unsigned char silence[2] = {0xFF, 0xFF};
if (!celt_accum)
@@ -634,7 +669,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
int opus_decode_native(OpusDecoder *st, const unsigned char *data,
opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec,
- int self_delimited, opus_int32 *packet_offset, int soft_clip)
+ int self_delimited, opus_int32 *packet_offset, int soft_clip, const OpusDRED *dred, opus_int32 dred_offset)
{
int i, nb_samples;
int count, offset;
@@ -648,6 +683,35 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data,
/* For FEC/PLC, frame_size has to be to have a multiple of 2.5 ms */
if ((decode_fec || len==0 || data==NULL) && frame_size%(st->Fs/400)!=0)
return OPUS_BAD_ARG;
+#ifdef ENABLE_DRED
+ if (dred != NULL && dred->process_stage == 2) {
+ int F10;
+ int features_per_frame;
+ int needed_feature_frames;
+ int init_frames;
+ lpcnet_plc_fec_clear(&st->lpcnet);
+ F10 = st->Fs/100;
+ /* if blend==0, the last PLC call was "update" and we need to feed two extra 10-ms frames. */
+ init_frames = (st->lpcnet.blend == 0) ? 2 : 0;
+ features_per_frame = IMAX(1, frame_size/F10);
+ needed_feature_frames = init_frames + features_per_frame;
+ lpcnet_plc_fec_clear(&st->lpcnet);
+ for (i=0;i<needed_feature_frames;i++) {
+ int feature_offset;
+ /* We floor instead of rounding because 5-ms overlap compensates for the missing 0.5 rounding offset. */
+ feature_offset = init_frames - i - 2 + (int)floor(((float)dred_offset + dred->dred_offset*F10/4)/F10);
+ if (feature_offset <= 4*dred->nb_latents-1 && feature_offset >= 0) {
+ lpcnet_plc_fec_add(&st->lpcnet, dred->fec_features+feature_offset*DRED_NUM_FEATURES);
+ } else {
+ if (feature_offset >= 0) lpcnet_plc_fec_add(&st->lpcnet, NULL);
+ }
+
+ }
+ }
+#else
+ (void)dred;
+ (void)dred_offset;
+#endif
if (len==0 || data==NULL)
{
int pcm_count=0;
@@ -672,7 +736,7 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data,
packet_stream_channels = opus_packet_get_nb_channels(data);
count = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
- size, &offset, packet_offset);
+ size, &offset, packet_offset, NULL, NULL);
if (count<0)
return count;
@@ -684,12 +748,12 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data,
int ret;
/* If no FEC can be present, run the PLC (recursive call) */
if (frame_size < packet_frame_size || packet_mode == MODE_CELT_ONLY || st->mode == MODE_CELT_ONLY)
- return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip);
+ return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, soft_clip, NULL, 0);
/* Otherwise, run the PLC on everything except the size for which we might have FEC */
duration_copy = st->last_packet_duration;
if (frame_size-packet_frame_size!=0)
{
- ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip);
+ ret = opus_decode_native(st, NULL, 0, pcm, frame_size-packet_frame_size, 0, 0, NULL, soft_clip, NULL, 0);
if (ret<0)
{
st->last_packet_duration = duration_copy;
@@ -753,7 +817,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
{
if(frame_size<=0)
return OPUS_BAD_ARG;
- return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+ return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
}
#ifndef DISABLE_FLOAT_API
@@ -781,7 +845,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data,
celt_assert(st->channels == 1 || st->channels == 2);
ALLOC(out, frame_size*st->channels, opus_int16);
- ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0);
+ ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
if (ret > 0)
{
for (i=0;i<ret*st->channels;i++)
@@ -819,7 +883,7 @@ int opus_decode(OpusDecoder *st, const unsigned char *data,
celt_assert(st->channels == 1 || st->channels == 2);
ALLOC(out, frame_size*st->channels, float);
- ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1);
+ ret = opus_decode_native(st, data, len, out, frame_size, decode_fec, 0, NULL, 1, NULL, 0);
if (ret > 0)
{
for (i=0;i<ret*st->channels;i++)
@@ -834,7 +898,7 @@ int opus_decode_float(OpusDecoder *st, const unsigned char *data,
{
if(frame_size<=0)
return OPUS_BAD_ARG;
- return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0);
+ return opus_decode_native(st, data, len, pcm, frame_size, decode_fec, 0, NULL, 0, NULL, 0);
}
#endif
@@ -864,6 +928,27 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
*value = st->bandwidth;
}
break;
+ case OPUS_SET_COMPLEXITY_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>10)
+ {
+ goto bad_arg;
+ }
+ st->complexity = value;
+ celt_decoder_ctl(celt_dec, OPUS_SET_COMPLEXITY(value));
+ }
+ break;
+ case OPUS_GET_COMPLEXITY_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->complexity;
+ }
+ break;
case OPUS_GET_FINAL_RANGE_REQUEST:
{
opus_uint32 *value = va_arg(ap, opus_uint32*);
@@ -881,9 +966,12 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
((char*)&st->OPUS_DECODER_RESET_START - (char*)st));
celt_decoder_ctl(celt_dec, OPUS_RESET_STATE);
- silk_InitDecoder( silk_dec );
+ silk_ResetDecoder( silk_dec );
st->stream_channels = st->channels;
st->frame_size = st->Fs/400;
+#ifdef ENABLE_DEEP_PLC
+ lpcnet_plc_reset( &st->lpcnet );
+#endif
}
break;
case OPUS_GET_SAMPLE_RATE_REQUEST:
@@ -959,6 +1047,20 @@ int opus_decoder_ctl(OpusDecoder *st, int request, ...)
ret = celt_decoder_ctl(celt_dec, OPUS_GET_PHASE_INVERSION_DISABLED(value));
}
break;
+#ifdef USE_WEIGHTS_FILE
+ case OPUS_SET_DNN_BLOB_REQUEST:
+ {
+ const unsigned char *data = va_arg(ap, const unsigned char *);
+ opus_int32 len = va_arg(ap, opus_int32);
+ if(len<0 || data == NULL)
+ {
+ goto bad_arg;
+ }
+ ret = lpcnet_plc_load_model(&st->lpcnet, data, len);
+ ret = silk_LoadOSCEModels(silk_dec, data, len) || ret;
+ }
+ break;
+#endif
default:
/*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/
ret = OPUS_UNIMPLEMENTED;
@@ -1034,8 +1136,373 @@ int opus_packet_get_nb_samples(const unsigned char packet[], opus_int32 len,
return samples;
}
+int opus_packet_has_lbrr(const unsigned char packet[], opus_int32 len)
+{
+ int ret;
+ const unsigned char *frames[48];
+ opus_int16 size[48];
+ int packet_mode, packet_frame_size, packet_stream_channels;
+ int nb_frames=1;
+ int lbrr;
+
+ packet_mode = opus_packet_get_mode(packet);
+ if (packet_mode == MODE_CELT_ONLY)
+ return 0;
+ packet_frame_size = opus_packet_get_samples_per_frame(packet, 48000);
+ if (packet_frame_size > 960)
+ nb_frames = packet_frame_size/960;
+ packet_stream_channels = opus_packet_get_nb_channels(packet);
+ ret = opus_packet_parse(packet, len, NULL, frames, size, NULL);
+ if (ret <= 0)
+ return ret;
+ lbrr = (frames[0][0] >> (7-nb_frames)) & 0x1;
+ if (packet_stream_channels == 2)
+ lbrr = lbrr || ((frames[0][0] >> (6-2*nb_frames)) & 0x1);
+ return lbrr;
+}
+
int opus_decoder_get_nb_samples(const OpusDecoder *dec,
const unsigned char packet[], opus_int32 len)
{
return opus_packet_get_nb_samples(packet, len, dec->Fs);
}
+
+struct OpusDREDDecoder {
+#ifdef ENABLE_DRED
+ RDOVAEDec model;
+#endif
+ int loaded;
+ int arch;
+ opus_uint32 magic;
+};
+
+#if defined(ENABLE_DRED) && (defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS))
+static void validate_dred_decoder(OpusDREDDecoder *st)
+{
+ celt_assert(st->magic == 0xD8EDDEC0);
+#ifdef OPUS_ARCHMASK
+ celt_assert(st->arch >= 0);
+ celt_assert(st->arch <= OPUS_ARCHMASK);
+#endif
+}
+#define VALIDATE_DRED_DECODER(st) validate_dred_decoder(st)
+#else
+#define VALIDATE_DRED_DECODER(st)
+#endif
+
+
+int opus_dred_decoder_get_size(void)
+{
+ return sizeof(OpusDREDDecoder);
+}
+
+#ifdef ENABLE_DRED
+int dred_decoder_load_model(OpusDREDDecoder *dec, const unsigned char *data, int len)
+{
+ WeightArray *list;
+ int ret;
+ parse_weights(&list, data, len);
+ ret = init_rdovaedec(&dec->model, list);
+ opus_free(list);
+ if (ret == 0) dec->loaded = 1;
+ return (ret == 0) ? OPUS_OK : OPUS_BAD_ARG;
+}
+#endif
+
+int opus_dred_decoder_init(OpusDREDDecoder *dec)
+{
+ int ret = 0;
+ dec->loaded = 0;
+#if defined(ENABLE_DRED) && !defined(USE_WEIGHTS_FILE)
+ ret = init_rdovaedec(&dec->model, rdovaedec_arrays);
+ if (ret == 0) dec->loaded = 1;
+#endif
+ dec->arch = opus_select_arch();
+ /* To make sure nobody forgets to init, use a magic number. */
+ dec->magic = 0xD8EDDEC0;
+ return (ret == 0) ? OPUS_OK : OPUS_UNIMPLEMENTED;
+}
+
+OpusDREDDecoder *opus_dred_decoder_create(int *error)
+{
+ int ret;
+ OpusDREDDecoder *dec;
+ dec = (OpusDREDDecoder *)opus_alloc(opus_dred_decoder_get_size());
+ if (dec == NULL)
+ {
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ return NULL;
+ }
+ ret = opus_dred_decoder_init(dec);
+ if (error)
+ *error = ret;
+ if (ret != OPUS_OK)
+ {
+ opus_free(dec);
+ dec = NULL;
+ }
+ return dec;
+}
+
+void opus_dred_decoder_destroy(OpusDREDDecoder *dec)
+{
+ if (dec) dec->magic = 0xDE57801D;
+ opus_free(dec);
+}
+
+int opus_dred_decoder_ctl(OpusDREDDecoder *dred_dec, int request, ...)
+{
+#ifdef ENABLE_DRED
+ int ret = OPUS_OK;
+ va_list ap;
+
+ va_start(ap, request);
+ (void)dred_dec;
+ switch (request)
+ {
+# ifdef USE_WEIGHTS_FILE
+ case OPUS_SET_DNN_BLOB_REQUEST:
+ {
+ const unsigned char *data = va_arg(ap, const unsigned char *);
+ opus_int32 len = va_arg(ap, opus_int32);
+ if(len<0 || data == NULL)
+ {
+ goto bad_arg;
+ }
+ return dred_decoder_load_model(dred_dec, data, len);
+ }
+ break;
+# endif
+ default:
+ /*fprintf(stderr, "unknown opus_decoder_ctl() request: %d", request);*/
+ ret = OPUS_UNIMPLEMENTED;
+ break;
+ }
+ va_end(ap);
+ return ret;
+# ifdef USE_WEIGHTS_FILE
+bad_arg:
+ va_end(ap);
+ return OPUS_BAD_ARG;
+# endif
+#else
+ (void)dred_dec;
+ (void)request;
+ return OPUS_UNIMPLEMENTED;
+#endif
+}
+
+#ifdef ENABLE_DRED
+static int dred_find_payload(const unsigned char *data, opus_int32 len, const unsigned char **payload, int *dred_frame_offset)
+{
+ const unsigned char *data0;
+ int len0;
+ int frame = 0;
+ int ret;
+ const unsigned char *frames[48];
+ opus_int16 size[48];
+ int frame_size;
+
+ *payload = NULL;
+ /* Get the padding section of the packet. */
+ ret = opus_packet_parse_impl(data, len, 0, NULL, frames, size, NULL, NULL, &data0, &len0);
+ if (ret < 0)
+ return ret;
+ frame_size = opus_packet_get_samples_per_frame(data, 48000);
+ data = data0;
+ len = len0;
+ /* Scan extensions in order until we find the earliest frame with DRED data. */
+ while (len > 0)
+ {
+ opus_int32 header_size;
+ int id, L;
+ len0 = len;
+ data0 = data;
+ id = *data0 >> 1;
+ L = *data0 & 0x1;
+ len = skip_extension(&data, len, &header_size);
+ if (len < 0)
+ break;
+ if (id == 1)
+ {
+ if (L==0)
+ {
+ frame++;
+ } else {
+ frame += data0[1];
+ }
+ } else if (id == DRED_EXTENSION_ID)
+ {
+ const unsigned char *curr_payload;
+ opus_int32 curr_payload_len;
+ curr_payload = data0+header_size;
+ curr_payload_len = (data-data0)-header_size;
+ /* DRED position in the packet, in units of 2.5 ms like for the signaled DRED offset. */
+ *dred_frame_offset = frame*frame_size/120;
+#ifdef DRED_EXPERIMENTAL_VERSION
+ /* Check that temporary extension type and version match.
+ This check will be removed once extension is finalized. */
+ if (curr_payload_len > DRED_EXPERIMENTAL_BYTES && curr_payload[0] == 'D' && curr_payload[1] == DRED_EXPERIMENTAL_VERSION) {
+ *payload = curr_payload+2;
+ return curr_payload_len-2;
+ }
+#else
+ if (curr_payload_len > 0) {
+ *payload = curr_payload;
+ return curr_payload_len;
+ }
+#endif
+ }
+ }
+ return 0;
+}
+#endif
+
+int opus_dred_get_size(void)
+{
+#ifdef ENABLE_DRED
+ return sizeof(OpusDRED);
+#else
+ return 0;
+#endif
+}
+
+OpusDRED *opus_dred_alloc(int *error)
+{
+#ifdef ENABLE_DRED
+ OpusDRED *dec;
+ dec = (OpusDRED *)opus_alloc(opus_dred_get_size());
+ if (dec == NULL)
+ {
+ if (error)
+ *error = OPUS_ALLOC_FAIL;
+ return NULL;
+ }
+ return dec;
+#else
+ if (error)
+ *error = OPUS_UNIMPLEMENTED;
+ return NULL;
+#endif
+}
+
+void opus_dred_free(OpusDRED *dec)
+{
+#ifdef ENABLE_DRED
+ opus_free(dec);
+#else
+ (void)dec;
+#endif
+}
+
+int opus_dred_parse(OpusDREDDecoder *dred_dec, OpusDRED *dred, const unsigned char *data, opus_int32 len, opus_int32 max_dred_samples, opus_int32 sampling_rate, int *dred_end, int defer_processing)
+{
+#ifdef ENABLE_DRED
+ const unsigned char *payload;
+ opus_int32 payload_len;
+ int dred_frame_offset=0;
+ VALIDATE_DRED_DECODER(dred_dec);
+ if (!dred_dec->loaded) return OPUS_UNIMPLEMENTED;
+ dred->process_stage = -1;
+ payload_len = dred_find_payload(data, len, &payload, &dred_frame_offset);
+ if (payload_len < 0)
+ return payload_len;
+ if (payload != NULL)
+ {
+ int offset;
+ int min_feature_frames;
+ offset = 100*max_dred_samples/sampling_rate;
+ min_feature_frames = IMIN(2 + offset, 2*DRED_NUM_REDUNDANCY_FRAMES);
+ dred_ec_decode(dred, payload, payload_len, min_feature_frames, dred_frame_offset);
+ if (!defer_processing)
+ opus_dred_process(dred_dec, dred, dred);
+ if (dred_end) *dred_end = IMAX(0, -dred->dred_offset*sampling_rate/400);
+ return IMAX(0, dred->nb_latents*sampling_rate/25 - dred->dred_offset* sampling_rate/400);
+ }
+ if (dred_end) *dred_end = 0;
+ return 0;
+#else
+ (void)dred_dec;
+ (void)dred;
+ (void)data;
+ (void)len;
+ (void)max_dred_samples;
+ (void)sampling_rate;
+ (void)defer_processing;
+ (void)dred_end;
+ return OPUS_UNIMPLEMENTED;
+#endif
+}
+
+int opus_dred_process(OpusDREDDecoder *dred_dec, const OpusDRED *src, OpusDRED *dst)
+{
+#ifdef ENABLE_DRED
+ if (dred_dec == NULL || src == NULL || dst == NULL || (src->process_stage != 1 && src->process_stage != 2))
+ return OPUS_BAD_ARG;
+ VALIDATE_DRED_DECODER(dred_dec);
+ if (!dred_dec->loaded) return OPUS_UNIMPLEMENTED;
+ if (src != dst)
+ OPUS_COPY(dst, src, 1);
+ if (dst->process_stage == 2)
+ return OPUS_OK;
+ DRED_rdovae_decode_all(&dred_dec->model, dst->fec_features, dst->state, dst->latents, dst->nb_latents, dred_dec->arch);
+ dst->process_stage = 2;
+ return OPUS_OK;
+#else
+ (void)dred_dec;
+ (void)src;
+ (void)dst;
+ return OPUS_UNIMPLEMENTED;
+#endif
+}
+
+int opus_decoder_dred_decode(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, opus_int16 *pcm, opus_int32 frame_size)
+{
+#ifdef ENABLE_DRED
+ VARDECL(float, out);
+ int ret, i;
+ ALLOC_STACK;
+
+ if(frame_size<=0)
+ {
+ RESTORE_STACK;
+ return OPUS_BAD_ARG;
+ }
+
+ celt_assert(st->channels == 1 || st->channels == 2);
+ ALLOC(out, frame_size*st->channels, float);
+
+ ret = opus_decode_native(st, NULL, 0, out, frame_size, 0, 0, NULL, 1, dred, dred_offset);
+ if (ret > 0)
+ {
+ for (i=0;i<ret*st->channels;i++)
+ pcm[i] = FLOAT2INT16(out[i]);
+ }
+ RESTORE_STACK;
+ return ret;
+#else
+ (void)st;
+ (void)dred;
+ (void)dred_offset;
+ (void)pcm;
+ (void)frame_size;
+ return OPUS_UNIMPLEMENTED;
+#endif
+}
+
+int opus_decoder_dred_decode_float(OpusDecoder *st, const OpusDRED *dred, opus_int32 dred_offset, float *pcm, opus_int32 frame_size)
+{
+#ifdef ENABLE_DRED
+ if(frame_size<=0)
+ return OPUS_BAD_ARG;
+ return opus_decode_native(st, NULL, 0, pcm, frame_size, 0, 0, NULL, 0, dred, dred_offset);
+#else
+ (void)st;
+ (void)dred;
+ (void)dred_offset;
+ (void)pcm;
+ (void)frame_size;
+ return OPUS_UNIMPLEMENTED;
+#endif
+}
diff --git a/media/libopus/src/opus_encoder.c b/media/libopus/src/opus_encoder.c
index 8c8db5a546..d18d582f03 100644
--- a/media/libopus/src/opus_encoder.c
+++ b/media/libopus/src/opus_encoder.c
@@ -45,11 +45,19 @@
#include "analysis.h"
#include "mathops.h"
#include "tuning_parameters.h"
+
+#ifdef ENABLE_DRED
+#include "dred_coding.h"
+#endif
+
#ifdef FIXED_POINT
#include "fixed/structs_FIX.h"
#else
#include "float/structs_FLP.h"
#endif
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
#define MAX_ENCODER_BUFFER 480
@@ -67,6 +75,9 @@ struct OpusEncoder {
int celt_enc_offset;
int silk_enc_offset;
silk_EncControlStruct silk_mode;
+#ifdef ENABLE_DRED
+ DREDEnc dred_encoder;
+#endif
int application;
int channels;
int delay_compensation;
@@ -116,6 +127,14 @@ struct OpusEncoder {
int nb_no_activity_ms_Q1;
opus_val32 peak_signal_energy;
#endif
+#ifdef ENABLE_DRED
+ int dred_duration;
+ int dred_q0;
+ int dred_dQ;
+ int dred_qmax;
+ int dred_target_chunks;
+ unsigned char activity_mem[DRED_MAX_FRAMES*4]; /* 2.5ms resolution*/
+#endif
int nonfinal_frame; /* current frame is not the final in a packet */
opus_uint32 rangeFinal;
};
@@ -224,6 +243,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
st->silk_mode.packetLossPercentage = 0;
st->silk_mode.complexity = 9;
st->silk_mode.useInBandFEC = 0;
+ st->silk_mode.useDRED = 0;
st->silk_mode.useDTX = 0;
st->silk_mode.useCBR = 0;
st->silk_mode.reducedDependency = 0;
@@ -236,6 +256,11 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
+#ifdef ENABLE_DRED
+ /* Initialize DRED Encoder */
+ dred_encoder_init( &st->dred_encoder, Fs, channels );
+#endif
+
st->use_vbr = 1;
/* Makes constrained VBR the default (safer for real-time use) */
st->vbr_constraint = 1;
@@ -544,6 +569,73 @@ OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, i
return st;
}
+#ifdef ENABLE_DRED
+
+static const float dred_bits_table[16] = {73.2f, 68.1f, 62.5f, 57.0f, 51.5f, 45.7f, 39.9f, 32.4f, 26.4f, 20.4f, 16.3f, 13.f, 9.3f, 8.2f, 7.2f, 6.4f};
+static int estimate_dred_bitrate(int q0, int dQ, int qmax, int duration, opus_int32 target_bits, int *target_chunks) {
+ int dred_chunks;
+ int i;
+ float bits;
+ /* Signaling DRED costs 3 bytes. */
+ bits = 8*(3+DRED_EXPERIMENTAL_BYTES);
+ /* Approximation for the size of the IS. */
+ bits += 50.f+dred_bits_table[q0];
+ dred_chunks = IMIN((duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2);
+ if (target_chunks != NULL) *target_chunks = 0;
+ for (i=0;i<dred_chunks;i++) {
+ int q = compute_quantizer(q0, dQ, qmax, i);
+ bits += dred_bits_table[q];
+ if (target_chunks != NULL && bits < target_bits) *target_chunks = i+1;
+ }
+ return (int)floor(.5f+bits);
+}
+
+static opus_int32 compute_dred_bitrate(OpusEncoder *st, opus_int32 bitrate_bps, int frame_size)
+{
+ float dred_frac;
+ int bitrate_offset;
+ opus_int32 dred_bitrate;
+ opus_int32 target_dred_bitrate;
+ int target_chunks;
+ opus_int32 max_dred_bits;
+ int q0, dQ, qmax;
+ if (st->silk_mode.useInBandFEC) {
+ dred_frac = MIN16(.7f, 3.f*st->silk_mode.packetLossPercentage/100.f);
+ bitrate_offset = 20000;
+ } else {
+ if (st->silk_mode.packetLossPercentage > 5) {
+ dred_frac = MIN16(.8f, .55f + st->silk_mode.packetLossPercentage/100.f);
+ } else {
+ dred_frac = 12*st->silk_mode.packetLossPercentage/100.f;
+ }
+ bitrate_offset = 12000;
+ }
+ /* Account for the fact that longer packets require less redundancy. */
+ dred_frac = dred_frac/(dred_frac + (1-dred_frac)*(frame_size*50.f)/st->Fs);
+ /* Approximate fit based on a few experiments. Could probably be improved. */
+ q0 = IMIN(15, IMAX(4, 51 - 3*EC_ILOG(IMAX(1, bitrate_bps-bitrate_offset))));
+ dQ = bitrate_bps-bitrate_offset > 36000 ? 3 : 5;
+ qmax = 15;
+ target_dred_bitrate = IMAX(0, (int)(dred_frac*(bitrate_bps-bitrate_offset)));
+ if (st->dred_duration > 0) {
+ opus_int32 target_bits = target_dred_bitrate*frame_size/st->Fs;
+ max_dred_bits = estimate_dred_bitrate(q0, dQ, qmax, st->dred_duration, target_bits, &target_chunks);
+ } else {
+ max_dred_bits = 0;
+ target_chunks=0;
+ }
+ dred_bitrate = IMIN(target_dred_bitrate, max_dred_bits*st->Fs/frame_size);
+ /* If we can't afford enough bits, don't bother with DRED at all. */
+ if (target_chunks < 2)
+ dred_bitrate = 0;
+ st->dred_q0 = q0;
+ st->dred_dQ = dQ;
+ st->dred_qmax = qmax;
+ st->dred_target_chunks = target_chunks;
+ return dred_bitrate;
+}
+#endif
+
static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
{
if(!frame_size)frame_size=st->Fs/400;
@@ -872,7 +964,7 @@ static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, in
/* Compute the right shift required in the MAC to avoid an overflow */
max_shift = celt_ilog2(len);
- shift = IMAX(0, (celt_ilog2(sample_max) << 1) + max_shift - 28);
+ shift = IMAX(0, (celt_ilog2(1+sample_max) << 1) + max_shift - 28);
/* Compute the energy */
for (i=0; i<len; i++)
@@ -922,105 +1014,6 @@ static int decide_dtx_mode(opus_int activity, /* indicates if this fr
#endif
-static opus_int32 encode_multiframe_packet(OpusEncoder *st,
- const opus_val16 *pcm,
- int nb_frames,
- int frame_size,
- unsigned char *data,
- opus_int32 out_data_bytes,
- int to_celt,
- int lsb_depth,
- int float_api)
-{
- int i;
- int ret = 0;
- VARDECL(unsigned char, tmp_data);
- int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
- VARDECL(OpusRepacketizer, rp);
- int max_header_bytes;
- opus_int32 bytes_per_frame;
- opus_int32 cbr_bytes;
- opus_int32 repacketize_len;
- int tmp_len;
- ALLOC_STACK;
-
- /* Worst cases:
- * 2 frames: Code 2 with different compressed sizes
- * >2 frames: Code 3 VBR */
- max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2);
-
- if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX)
- repacketize_len = out_data_bytes;
- else {
- cbr_bytes = 3*st->bitrate_bps/(3*8*st->Fs/(frame_size*nb_frames));
- repacketize_len = IMIN(cbr_bytes, out_data_bytes);
- }
- bytes_per_frame = IMIN(1276, 1+(repacketize_len-max_header_bytes)/nb_frames);
-
- ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
- ALLOC(rp, 1, OpusRepacketizer);
- opus_repacketizer_init(rp);
-
- bak_mode = st->user_forced_mode;
- bak_bandwidth = st->user_bandwidth;
- bak_channels = st->force_channels;
-
- st->user_forced_mode = st->mode;
- st->user_bandwidth = st->bandwidth;
- st->force_channels = st->stream_channels;
-
- bak_to_mono = st->silk_mode.toMono;
- if (bak_to_mono)
- st->force_channels = 1;
- else
- st->prev_channels = st->stream_channels;
-
- for (i=0;i<nb_frames;i++)
- {
- st->silk_mode.toMono = 0;
- st->nonfinal_frame = i<(nb_frames-1);
-
- /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
- if (to_celt && i==nb_frames-1)
- st->user_forced_mode = MODE_CELT_ONLY;
-
- tmp_len = opus_encode_native(st, pcm+i*(st->channels*frame_size), frame_size,
- tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, NULL, 0, 0, 0, 0,
- NULL, float_api);
-
- if (tmp_len<0)
- {
- RESTORE_STACK;
- return OPUS_INTERNAL_ERROR;
- }
-
- ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);
-
- if (ret<0)
- {
- RESTORE_STACK;
- return OPUS_INTERNAL_ERROR;
- }
- }
-
- ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);
-
- if (ret<0)
- {
- RESTORE_STACK;
- return OPUS_INTERNAL_ERROR;
- }
-
- /* Discard configs that were forced locally for the purpose of repacketization */
- st->user_forced_mode = bak_mode;
- st->user_bandwidth = bak_bandwidth;
- st->force_channels = bak_channels;
- st->silk_mode.toMono = bak_to_mono;
-
- RESTORE_STACK;
- return ret;
-}
-
static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrate_bps, int frame_rate, int channels)
{
int redundancy_bytes_cap;
@@ -1049,6 +1042,18 @@ static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrat
return redundancy_bytes;
}
+static opus_int32 opus_encode_frame_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+ unsigned char *data, opus_int32 max_data_bytes,
+ int float_api, int first_frame,
+#ifdef ENABLE_DRED
+ opus_int32 dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ AnalysisInfo *analysis_info, int is_silence,
+#endif
+ int redundancy, int celt_to_silk, int prefill,
+ opus_int32 equiv_rate, int to_celt);
+
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
@@ -1058,28 +1063,17 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
CELTEncoder *celt_enc;
int i;
int ret=0;
- opus_int32 nBytes;
- ec_enc enc;
- int bytes_target;
int prefill=0;
- int start_band = 0;
int redundancy = 0;
- int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
int celt_to_silk = 0;
- VARDECL(opus_val16, pcm_buf);
- int nb_compr_bytes;
int to_celt = 0;
- opus_uint32 redundant_rng = 0;
- int cutoff_Hz, hp_freq_smth1;
int voice_est; /* Probability of voice in Q7 */
opus_int32 equiv_rate;
- int delay_compensation;
int frame_rate;
opus_int32 max_rate; /* Max bitrate we're allowed to use */
int curr_bandwidth;
- opus_val16 HB_gain;
opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
- int total_buffer;
+ opus_int32 cbr_bytes=-1;
opus_val16 stereo_width;
const CELTMode *celt_mode;
#ifndef DISABLE_FLOAT_API
@@ -1088,10 +1082,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
int analysis_read_subframe_bak=-1;
int is_silence = 0;
#endif
- opus_int activity = VAD_NO_DECISION;
-
- VARDECL(opus_val16, tmp_prefill);
-
+#ifdef ENABLE_DRED
+ opus_int32 dred_bitrate_bps;
+#endif
ALLOC_STACK;
max_data_bytes = IMIN(1276, out_data_bytes);
@@ -1112,10 +1105,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
silk_enc = (char*)st+st->silk_enc_offset;
celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
- if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
- delay_compensation = 0;
- else
- delay_compensation = st->delay_compensation;
lsb_depth = IMIN(lsb_depth, st->lsb_depth);
@@ -1157,20 +1146,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (!is_silence)
st->voice_ratio = -1;
- if (is_silence)
- {
- activity = !is_silence;
- } else if (analysis_info.valid)
- {
- activity = analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD;
- if (!activity)
- {
- /* Mark as active if this noise frame is sufficiently loud */
- opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
- activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
- }
- }
-
st->detected_bandwidth = 0;
if (analysis_info.valid)
{
@@ -1207,21 +1182,24 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
else
stereo_width = 0;
- total_buffer = delay_compensation;
st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
frame_rate = st->Fs/frame_size;
if (!st->use_vbr)
{
- int cbrBytes;
/* Multiply by 12 to make sure the division is exact. */
int frame_rate12 = 12*st->Fs/frame_size;
/* We need to make sure that "int" values always fit in 16 bits. */
- cbrBytes = IMIN( (12*st->bitrate_bps/8 + frame_rate12/2)/frame_rate12, max_data_bytes);
- st->bitrate_bps = cbrBytes*(opus_int32)frame_rate12*8/12;
+ cbr_bytes = IMIN( (12*st->bitrate_bps/8 + frame_rate12/2)/frame_rate12, max_data_bytes);
+ st->bitrate_bps = cbr_bytes*(opus_int32)frame_rate12*8/12;
/* Make sure we provide at least one byte to avoid failing. */
- max_data_bytes = IMAX(1, cbrBytes);
+ max_data_bytes = IMAX(1, cbr_bytes);
}
+#ifdef ENABLE_DRED
+ /* Allocate some of the bits to DRED if needed. */
+ dred_bitrate_bps = compute_dred_bitrate(st, st->bitrate_bps, frame_size);
+ st->bitrate_bps -= dred_bitrate_bps;
+#endif
if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
|| (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
{
@@ -1575,6 +1553,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
{
int enc_frame_size;
int nb_frames;
+ VARDECL(unsigned char, tmp_data);
+ VARDECL(OpusRepacketizer, rp);
+ int max_header_bytes;
+ opus_int32 repacketize_len;
+ opus_int32 max_len_sum;
+ opus_int32 tot_size=0;
+ unsigned char *curr_data;
+ int tmp_len;
+ int dtx_count = 0;
if (st->mode == MODE_SILK_ONLY)
{
@@ -1593,17 +1580,186 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
#ifndef DISABLE_FLOAT_API
if (analysis_read_pos_bak!= -1)
{
+ /* Reset analysis position to the beginning of the first frame so we
+ can use it one frame at a time. */
st->analysis.read_pos = analysis_read_pos_bak;
st->analysis.read_subframe = analysis_read_subframe_bak;
}
#endif
- ret = encode_multiframe_packet(st, pcm, nb_frames, enc_frame_size, data,
- out_data_bytes, to_celt, lsb_depth, float_api);
+ /* Worst cases:
+ * 2 frames: Code 2 with different compressed sizes
+ * >2 frames: Code 3 VBR */
+ max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2);
+
+ if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX)
+ repacketize_len = out_data_bytes;
+ else {
+ celt_assert(cbr_bytes>=0);
+ repacketize_len = IMIN(cbr_bytes, out_data_bytes);
+ }
+ max_len_sum = nb_frames + repacketize_len - max_header_bytes;
+
+ ALLOC(tmp_data, max_len_sum, unsigned char);
+ curr_data = tmp_data;
+ ALLOC(rp, 1, OpusRepacketizer);
+ opus_repacketizer_init(rp);
+
+ int bak_to_mono = st->silk_mode.toMono;
+ if (bak_to_mono)
+ st->force_channels = 1;
+ else
+ st->prev_channels = st->stream_channels;
+
+ for (i=0;i<nb_frames;i++)
+ {
+ int first_frame;
+ int frame_to_celt;
+ int frame_redundancy;
+ opus_int32 curr_max;
+ /* Attempt DRED encoding until we have a non-DTX frame. In case of DTX refresh,
+ that allows for DRED not to be in the first frame. */
+ first_frame = (i == 0) || (i == dtx_count);
+ st->silk_mode.toMono = 0;
+ st->nonfinal_frame = i<(nb_frames-1);
+
+ /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
+ frame_to_celt = to_celt && i==nb_frames-1;
+ frame_redundancy = redundancy && (frame_to_celt || (!to_celt && i==0));
+
+ curr_max = IMIN(3*st->bitrate_bps/(3*8*st->Fs/enc_frame_size), max_len_sum/nb_frames);
+#ifdef ENABLE_DRED
+ curr_max = IMIN(curr_max, (max_len_sum-3*dred_bitrate_bps/(3*8*st->Fs/frame_size))/nb_frames);
+ if (first_frame) curr_max += 3*dred_bitrate_bps/(3*8*st->Fs/frame_size);
+#endif
+ curr_max = IMIN(max_len_sum-tot_size, curr_max);
+#ifndef DISABLE_FLOAT_API
+ if (analysis_read_pos_bak != -1) {
+ is_silence = is_digital_silence(pcm, frame_size, st->channels, lsb_depth);
+ /* Get analysis for current frame. */
+ tonality_get_info(&st->analysis, &analysis_info, enc_frame_size);
+ }
+#endif
+
+ tmp_len = opus_encode_frame_native(st, pcm+i*(st->channels*enc_frame_size), enc_frame_size, curr_data, curr_max, float_api, first_frame,
+#ifdef ENABLE_DRED
+ dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ &analysis_info,
+ is_silence,
+#endif
+ frame_redundancy, celt_to_silk, prefill,
+ equiv_rate, frame_to_celt
+ );
+ if (tmp_len<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ } else if (tmp_len==1) {
+ dtx_count++;
+ }
+ ret = opus_repacketizer_cat(rp, curr_data, tmp_len);
+
+ if (ret<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ tot_size += tmp_len;
+ curr_data += tmp_len;
+ }
+ ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr && (dtx_count != nb_frames), NULL, 0);
+ if (ret<0)
+ {
+ ret = OPUS_INTERNAL_ERROR;
+ }
+ st->silk_mode.toMono = bak_to_mono;
RESTORE_STACK;
return ret;
+ } else {
+ ret = opus_encode_frame_native(st, pcm, frame_size, data, max_data_bytes, float_api, 1,
+#ifdef ENABLE_DRED
+ dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ &analysis_info,
+ is_silence,
+#endif
+ redundancy, celt_to_silk, prefill,
+ equiv_rate, to_celt
+ );
+ RESTORE_STACK;
+ return ret;
}
+}
+
+static opus_int32 opus_encode_frame_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+ unsigned char *data, opus_int32 max_data_bytes,
+ int float_api, int first_frame,
+#ifdef ENABLE_DRED
+ opus_int32 dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ AnalysisInfo *analysis_info, int is_silence,
+#endif
+ int redundancy, int celt_to_silk, int prefill,
+ opus_int32 equiv_rate, int to_celt)
+{
+ void *silk_enc;
+ CELTEncoder *celt_enc;
+ const CELTMode *celt_mode;
+ int i;
+ int ret=0;
+ opus_int32 nBytes;
+ ec_enc enc;
+ int bytes_target;
+ int start_band = 0;
+ int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
+ int nb_compr_bytes;
+ opus_uint32 redundant_rng = 0;
+ int cutoff_Hz;
+ int hp_freq_smth1;
+ opus_val16 HB_gain;
+ int apply_padding;
+ int frame_rate;
+ int curr_bandwidth;
+ int delay_compensation;
+ int total_buffer;
+ opus_int activity = VAD_NO_DECISION;
+ VARDECL(opus_val16, pcm_buf);
+ VARDECL(opus_val16, tmp_prefill);
+ SAVE_STACK;
+
+ st->rangeFinal = 0;
+ silk_enc = (char*)st+st->silk_enc_offset;
+ celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+ celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+ curr_bandwidth = st->bandwidth;
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ total_buffer = delay_compensation;
+
+ frame_rate = st->Fs/frame_size;
+
+#ifndef DISABLE_FLOAT_API
+ if (is_silence)
+ {
+ activity = !is_silence;
+ } else if (analysis_info->valid)
+ {
+ activity = analysis_info->activity_probability >= DTX_ACTIVITY_THRESHOLD;
+ if (!activity)
+ {
+ /* Mark as active if this noise frame is sufficiently loud */
+ opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
+ activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
+ }
+ }
+#endif
/* For the first frame at a new SILK bandwidth */
if (st->silk_bw_switch)
@@ -1611,7 +1767,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
redundancy = 1;
celt_to_silk = 1;
st->silk_bw_switch = 0;
- /* Do a prefill without reseting the sampling rate control. */
+ /* Do a prefill without resetting the sampling rate control. */
prefill=2;
}
@@ -1651,6 +1807,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->application == OPUS_APPLICATION_VOIP)
{
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ /* write out high pass filtered clean signal*/
+ static FILE *fout =NULL;
+ if (fout == NULL)
+ {
+ fout = fopen("clean_hp.s16", "wb");
+ }
+
+ {
+ int idx;
+ opus_int16 tmp;
+ for (idx = 0; idx < frame_size; idx++)
+ {
+ tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
+ fwrite(&tmp, sizeof(tmp), 1, fout);
+ }
+ }
+#endif
} else {
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
}
@@ -1667,8 +1842,24 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0;
}
}
+#else
+ (void)float_api;
#endif
+#ifdef ENABLE_DRED
+ if ( st->dred_duration > 0 && st->dred_encoder.loaded ) {
+ int frame_size_400Hz;
+ /* DRED Encoder */
+ dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size, total_buffer, st->arch );
+ frame_size_400Hz = frame_size*400/st->Fs;
+ OPUS_MOVE(&st->activity_mem[frame_size_400Hz], st->activity_mem, 4*DRED_MAX_FRAMES-frame_size_400Hz);
+ for (i=0;i<frame_size_400Hz;i++)
+ st->activity_mem[i] = activity;
+ } else {
+ st->dred_encoder.latents_buffer_fill = 0;
+ OPUS_CLEAR(st->activity_mem, DRED_MAX_FRAMES);
+ }
+#endif
/* SILK processing */
HB_gain = Q15ONE;
@@ -1763,7 +1954,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->silk_mode.maxInternalSampleRate = 16000;
if (st->mode == MODE_SILK_ONLY)
{
- opus_int32 effective_max_rate = max_rate;
+ opus_int32 effective_max_rate = frame_rate*max_data_bytes*8;
if (frame_rate > 50)
effective_max_rate = effective_max_rate*2/3;
if (effective_max_rate < 8000)
@@ -1793,9 +1984,19 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
}
if (st->silk_mode.useCBR)
{
+ /* When we're in CBR mode, but we have non-SILK data to encode, switch SILK to VBR with cap to
+ save on complexity. Any variations will be absorbed by CELT and/or DRED and we can still
+ produce a constant bitrate without wasting bits. */
+#ifdef ENABLE_DRED
+ if (st->mode == MODE_HYBRID || dred_bitrate_bps > 0)
+#else
if (st->mode == MODE_HYBRID)
+#endif
{
- st->silk_mode.maxBits = IMIN(st->silk_mode.maxBits, st->silk_mode.bitRate * frame_size / st->Fs);
+ /* Allow SILK to steal up to 25% of the remaining bits */
+ opus_int16 other_bits = IMAX(0, st->silk_mode.maxBits - st->silk_mode.bitRate * frame_size / st->Fs);
+ st->silk_mode.maxBits = IMAX(0, st->silk_mode.maxBits - other_bits*3/4);
+ st->silk_mode.useCBR = 0;
}
} else {
/* Constrained VBR. */
@@ -1908,26 +2109,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->mode != MODE_SILK_ONLY)
{
opus_val32 celt_pred=2;
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
/* We may still decide to disable prediction later */
if (st->silk_mode.reducedDependency)
celt_pred = 0;
celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
-
- if (st->mode == MODE_HYBRID)
- {
- if( st->use_vbr ) {
- celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
- }
- } else {
- if (st->use_vbr)
- {
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
- celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
- }
- }
}
ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
@@ -2021,13 +2206,27 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
ec_enc_done(&enc);
nb_compr_bytes = ret;
} else {
- nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes;
- ec_enc_shrink(&enc, nb_compr_bytes);
+ nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes;
+#ifdef ENABLE_DRED
+ if (st->dred_duration > 0)
+ {
+ int max_celt_bytes;
+ opus_int32 dred_bytes = dred_bitrate_bps/(frame_rate*8);
+ /* Allow CELT to steal up to 25% of the remaining bits. */
+ max_celt_bytes = nb_compr_bytes - dred_bytes*3/4;
+ /* But try to give CELT at least 5 bytes to prevent a mismatch with
+ the redundancy signaling. */
+ max_celt_bytes = IMAX((ec_tell(&enc)+7)/8 + 5, max_celt_bytes);
+ /* Subject to the original max. */
+ nb_compr_bytes = IMIN(nb_compr_bytes, max_celt_bytes);
+ }
+#endif
+ ec_enc_shrink(&enc, nb_compr_bytes);
}
#ifndef DISABLE_FLOAT_API
if (redundancy || st->mode != MODE_SILK_ONLY)
- celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+ celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
#endif
if (st->mode == MODE_HYBRID) {
SILKInfo info;
@@ -2057,6 +2256,34 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->mode != MODE_SILK_ONLY)
{
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr));
+ if (st->mode == MODE_HYBRID)
+ {
+ if( st->use_vbr ) {
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
+ }
+ } else {
+ if (st->use_vbr)
+ {
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
+ }
+ }
+#ifdef ENABLE_DRED
+ /* When Using DRED CBR, we can actually make the CELT part VBR and have DRED pick up the slack. */
+ if (!st->use_vbr && st->dred_duration > 0)
+ {
+ opus_int32 celt_bitrate = st->bitrate_bps;
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
+ if (st->mode == MODE_HYBRID) {
+ celt_bitrate -= st->silk_mode.bitRate;
+ }
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(celt_bitrate));
+ }
+#endif
if (st->mode != st->prev_mode && st->prev_mode > 0)
{
unsigned char dummy[2];
@@ -2069,10 +2296,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* If false, we already busted the budget and we'll end up with a "PLC frame" */
if (ec_tell(&enc) <= 8*nb_compr_bytes)
{
- /* Set the bitrate again if it was overridden in the redundancy code above*/
- if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr)
- celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr));
ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
if (ret < 0)
{
@@ -2080,10 +2303,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
return OPUS_INTERNAL_ERROR;
}
/* Put CELT->SILK redundancy data in the right place. */
- if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr)
+ if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && nb_compr_bytes != ret)
{
OPUS_MOVE(data+ret, data+nb_compr_bytes, redundancy_bytes);
- nb_compr_bytes = nb_compr_bytes+redundancy_bytes;
+ nb_compr_bytes = ret+redundancy_bytes;
}
}
}
@@ -2140,7 +2363,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* DTX decision */
#ifndef DISABLE_FLOAT_API
- if (st->use_dtx && (analysis_info.valid || is_silence))
+ if (st->use_dtx && (analysis_info->valid || is_silence))
{
if (decide_dtx_mode(activity, &st->nb_no_activity_ms_Q1, 2*1000*frame_size/st->Fs))
{
@@ -2178,7 +2401,51 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
}
/* Count ToC and redundancy */
ret += 1+redundancy_bytes;
- if (!st->use_vbr)
+ apply_padding = !st->use_vbr;
+#ifdef ENABLE_DRED
+ if (st->dred_duration > 0 && st->dred_encoder.loaded && first_frame) {
+ opus_extension_data extension;
+ unsigned char buf[DRED_MAX_DATA_SIZE];
+ int dred_chunks;
+ int dred_bytes_left;
+ dred_chunks = IMIN((st->dred_duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2);
+ if (st->use_vbr) dred_chunks = IMIN(dred_chunks, st->dred_target_chunks);
+ /* Remaining space for DRED, accounting for cost the 3 extra bytes for code 3, padding length, and extension number. */
+ dred_bytes_left = IMIN(DRED_MAX_DATA_SIZE, max_data_bytes-ret-3);
+ /* Account for the extra bytes required to signal large padding length. */
+ dred_bytes_left -= (dred_bytes_left+1+DRED_EXPERIMENTAL_BYTES)/255;
+ /* Check whether we actually have something to encode. */
+ if (dred_chunks >= 1 && dred_bytes_left >= DRED_MIN_BYTES+DRED_EXPERIMENTAL_BYTES) {
+ int dred_bytes;
+#ifdef DRED_EXPERIMENTAL_VERSION
+ /* Add temporary extension type and version.
+ These bytes will be removed once extension is finalized. */
+ buf[0] = 'D';
+ buf[1] = DRED_EXPERIMENTAL_VERSION;
+#endif
+ dred_bytes = dred_encode_silk_frame(&st->dred_encoder, buf+DRED_EXPERIMENTAL_BYTES, dred_chunks, dred_bytes_left-DRED_EXPERIMENTAL_BYTES,
+ st->dred_q0, st->dred_dQ, st->dred_qmax, st->activity_mem, st->arch);
+ if (dred_bytes > 0) {
+ dred_bytes += DRED_EXPERIMENTAL_BYTES;
+ celt_assert(dred_bytes <= dred_bytes_left);
+ extension.id = DRED_EXTENSION_ID;
+ extension.frame = 0;
+ extension.data = buf;
+ extension.len = dred_bytes;
+ ret = opus_packet_pad_impl(data, ret, max_data_bytes, !st->use_vbr, &extension, 1);
+ if (ret < 0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ apply_padding = 0;
+ }
+ }
+ }
+#else
+ (void)first_frame; /* Avoids a warning about first_frame being unused. */
+#endif
+ if (apply_padding)
{
if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
{
@@ -2677,6 +2944,29 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
celt_encoder_ctl(celt_enc, OPUS_GET_PHASE_INVERSION_DISABLED(value));
}
break;
+#ifdef ENABLE_DRED
+ case OPUS_SET_DRED_DURATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>DRED_MAX_FRAMES)
+ {
+ goto bad_arg;
+ }
+ st->dred_duration = value;
+ st->silk_mode.useDRED = !!value;
+ }
+ break;
+ case OPUS_GET_DRED_DURATION_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->dred_duration;
+ }
+ break;
+#endif
case OPUS_RESET_STATE:
{
void *silk_enc;
@@ -2692,6 +2982,10 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
silk_InitEncoder( silk_enc, st->arch, &dummy );
+#ifdef ENABLE_DRED
+ /* Initialize DRED Encoder */
+ dred_encoder_reset( &st->dred_encoder );
+#endif
st->stream_channels = st->channels;
st->hybrid_stereo_width_Q14 = 1 << 14;
st->prev_HB_gain = Q15ONE;
@@ -2752,6 +3046,21 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
}
}
break;
+#ifdef USE_WEIGHTS_FILE
+ case OPUS_SET_DNN_BLOB_REQUEST:
+ {
+ const unsigned char *data = va_arg(ap, const unsigned char *);
+ opus_int32 len = va_arg(ap, opus_int32);
+ if(len<0 || data == NULL)
+ {
+ goto bad_arg;
+ }
+#ifdef ENABLE_DRED
+ ret = dred_encoder_load_model(&st->dred_encoder, data, len);
+#endif
+ }
+ break;
+#endif
case CELT_GET_MODE_REQUEST:
{
const CELTMode ** value = va_arg(ap, const CELTMode**);
diff --git a/media/libopus/src/opus_multistream_decoder.c b/media/libopus/src/opus_multistream_decoder.c
index a2837c3549..4ae877a759 100644
--- a/media/libopus/src/opus_multistream_decoder.c
+++ b/media/libopus/src/opus_multistream_decoder.c
@@ -162,7 +162,7 @@ static int opus_multistream_packet_validate(const unsigned char *data,
if (len<=0)
return OPUS_INVALID_PACKET;
count = opus_packet_parse_impl(data, len, s!=nb_streams-1, &toc, NULL,
- size, NULL, &packet_offset);
+ size, NULL, &packet_offset, NULL, NULL);
if (count<0)
return count;
tmp_samples = opus_packet_get_nb_samples(data, packet_offset, Fs);
@@ -250,7 +250,7 @@ int opus_multistream_decode_native(
return OPUS_INTERNAL_ERROR;
}
packet_offset = 0;
- ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip);
+ ret = opus_decode_native(dec, data, len, buf, frame_size, decode_fec, s!=st->layout.nb_streams-1, &packet_offset, soft_clip, NULL, 0);
if (!do_plc)
{
data += packet_offset;
diff --git a/media/libopus/src/opus_multistream_encoder.c b/media/libopus/src/opus_multistream_encoder.c
index 213e3eb2c2..1725ade75a 100644
--- a/media/libopus/src/opus_multistream_encoder.c
+++ b/media/libopus/src/opus_multistream_encoder.c
@@ -1003,7 +1003,7 @@ int opus_multistream_encode_native
return OPUS_INTERNAL_ERROR;
}
len = opus_repacketizer_out_range_impl(&rp, 0, opus_repacketizer_get_nb_frames(&rp),
- data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1);
+ data, max_data_bytes-tot_size, s != st->layout.nb_streams-1, !vbr && s == st->layout.nb_streams-1, NULL, 0);
data += len;
tot_size += len;
}
diff --git a/media/libopus/src/opus_private.h b/media/libopus/src/opus_private.h
index 5e2463f546..364c21cebc 100644
--- a/media/libopus/src/opus_private.h
+++ b/media/libopus/src/opus_private.h
@@ -42,8 +42,17 @@ struct OpusRepacketizer {
const unsigned char *frames[48];
opus_int16 len[48];
int framesize;
+ const unsigned char *paddings[48];
+ opus_int32 padding_len[48];
};
+typedef struct {
+ int id;
+ int frame;
+ const unsigned char *data;
+ opus_int32 len;
+} opus_extension_data;
+
typedef struct ChannelLayout {
int nb_channels;
int nb_streams;
@@ -148,7 +157,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len,
opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited,
- opus_int32 *packet_offset, int soft_clip);
+ opus_int32 *packet_offset, int soft_clip, const OpusDRED *dred, opus_int32 dred_offset);
/* Make sure everything is properly aligned. */
static OPUS_INLINE int align(int i)
@@ -162,13 +171,18 @@ static OPUS_INLINE int align(int i)
return ((i + alignment - 1) / alignment) * alignment;
}
+/* More than that is ridiculous for now (3 * max frames per packet)*/
+opus_int32 skip_extension(const unsigned char **data, opus_int32 len, opus_int32 *header_size);
+
int opus_packet_parse_impl(const unsigned char *data, opus_int32 len,
int self_delimited, unsigned char *out_toc,
const unsigned char *frames[48], opus_int16 size[48],
- int *payload_offset, opus_int32 *packet_offset);
+ int *payload_offset, opus_int32 *packet_offset,
+ const unsigned char **padding, opus_int32 *padding_len);
opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
- unsigned char *data, opus_int32 maxlen, int self_delimited, int pad);
+ unsigned char *data, opus_int32 maxlen, int self_delimited, int pad,
+ const opus_extension_data *extensions, int nb_extensions);
int pad_frame(unsigned char *data, opus_int32 len, opus_int32 new_len);
@@ -198,4 +212,12 @@ int opus_multistream_decode_native(
void *user_data
);
+opus_int32 opus_packet_extensions_parse(const unsigned char *data, opus_int32 len, opus_extension_data *extensions, opus_int32 *nb_extensions);
+
+opus_int32 opus_packet_extensions_generate(unsigned char *data, opus_int32 len, const opus_extension_data *extensions, int nb_extensions, int pad);
+
+opus_int32 opus_packet_extensions_count(const unsigned char *data, opus_int32 len);
+
+opus_int32 opus_packet_pad_impl(unsigned char *data, opus_int32 len, opus_int32 new_len, int pad, const opus_extension_data *extensions, int nb_extensions);
+
#endif /* OPUS_PRIVATE_H */
diff --git a/media/libopus/src/opus_projection_encoder.c b/media/libopus/src/opus_projection_encoder.c
index 06fb2d2526..92813ad01f 100644
--- a/media/libopus/src/opus_projection_encoder.c
+++ b/media/libopus/src/opus_projection_encoder.c
@@ -177,6 +177,20 @@ opus_int32 opus_projection_ambisonics_encoder_get_size(int channels,
demixing_matrix_rows = mapping_matrix_toa_demixing.rows;
demixing_matrix_cols = mapping_matrix_toa_demixing.cols;
}
+ else if (order_plus_one == 5)
+ {
+ mixing_matrix_rows = mapping_matrix_fourthoa_mixing.rows;
+ mixing_matrix_cols = mapping_matrix_fourthoa_mixing.cols;
+ demixing_matrix_rows = mapping_matrix_fourthoa_demixing.rows;
+ demixing_matrix_cols = mapping_matrix_fourthoa_demixing.cols;
+ }
+ else if (order_plus_one == 6)
+ {
+ mixing_matrix_rows = mapping_matrix_fifthoa_mixing.rows;
+ mixing_matrix_cols = mapping_matrix_fifthoa_mixing.cols;
+ demixing_matrix_rows = mapping_matrix_fifthoa_demixing.rows;
+ demixing_matrix_cols = mapping_matrix_fifthoa_demixing.cols;
+ }
else
return 0;
@@ -245,6 +259,20 @@ int opus_projection_ambisonics_encoder_init(OpusProjectionEncoder *st, opus_int3
mapping_matrix_toa_mixing_data,
sizeof(mapping_matrix_toa_mixing_data));
}
+ else if (order_plus_one == 5)
+ {
+ mapping_matrix_init(mixing_matrix, mapping_matrix_fourthoa_mixing.rows,
+ mapping_matrix_fourthoa_mixing.cols, mapping_matrix_fourthoa_mixing.gain,
+ mapping_matrix_fourthoa_mixing_data,
+ sizeof(mapping_matrix_fourthoa_mixing_data));
+ }
+ else if (order_plus_one == 6)
+ {
+ mapping_matrix_init(mixing_matrix, mapping_matrix_fifthoa_mixing.rows,
+ mapping_matrix_fifthoa_mixing.cols, mapping_matrix_fifthoa_mixing.gain,
+ mapping_matrix_fifthoa_mixing_data,
+ sizeof(mapping_matrix_fifthoa_mixing_data));
+ }
else
return OPUS_BAD_ARG;
@@ -276,6 +304,20 @@ int opus_projection_ambisonics_encoder_init(OpusProjectionEncoder *st, opus_int3
mapping_matrix_toa_demixing_data,
sizeof(mapping_matrix_toa_demixing_data));
}
+ else if (order_plus_one == 5)
+ {
+ mapping_matrix_init(demixing_matrix, mapping_matrix_fourthoa_demixing.rows,
+ mapping_matrix_fourthoa_demixing.cols, mapping_matrix_fourthoa_demixing.gain,
+ mapping_matrix_fourthoa_demixing_data,
+ sizeof(mapping_matrix_fourthoa_demixing_data));
+ }
+ else if (order_plus_one == 6)
+ {
+ mapping_matrix_init(demixing_matrix, mapping_matrix_fifthoa_demixing.rows,
+ mapping_matrix_fifthoa_demixing.cols, mapping_matrix_fifthoa_demixing.gain,
+ mapping_matrix_fifthoa_demixing_data,
+ sizeof(mapping_matrix_fifthoa_demixing_data));
+ }
else
return OPUS_BAD_ARG;
diff --git a/media/libopus/src/repacketizer.c b/media/libopus/src/repacketizer.c
index bda44a148a..6a7a8b3d8e 100644
--- a/media/libopus/src/repacketizer.c
+++ b/media/libopus/src/repacketizer.c
@@ -32,6 +32,7 @@
#include "opus.h"
#include "opus_private.h"
#include "os_support.h"
+#include "stack_alloc.h"
int opus_repacketizer_get_size(void)
@@ -82,10 +83,19 @@ static int opus_repacketizer_cat_impl(OpusRepacketizer *rp, const unsigned char
return OPUS_INVALID_PACKET;
}
- ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames], NULL, NULL);
+ ret=opus_packet_parse_impl(data, len, self_delimited, &tmp_toc, &rp->frames[rp->nb_frames], &rp->len[rp->nb_frames],
+ NULL, NULL, &rp->paddings[rp->nb_frames], &rp->padding_len[rp->nb_frames]);
if(ret<1)return ret;
- rp->nb_frames += curr_nb_frames;
+ /* set padding length to zero for all but the first frame */
+ while (curr_nb_frames > 1)
+ {
+ rp->nb_frames++;
+ rp->padding_len[rp->nb_frames] = 0;
+ rp->paddings[rp->nb_frames] = NULL;
+ curr_nb_frames--;
+ }
+ rp->nb_frames++;
return OPUS_OK;
}
@@ -100,17 +110,23 @@ int opus_repacketizer_get_nb_frames(OpusRepacketizer *rp)
}
opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int end,
- unsigned char *data, opus_int32 maxlen, int self_delimited, int pad)
+ unsigned char *data, opus_int32 maxlen, int self_delimited, int pad, const opus_extension_data *extensions, int nb_extensions)
{
int i, count;
opus_int32 tot_size;
opus_int16 *len;
const unsigned char **frames;
unsigned char * ptr;
+ int ones_begin=0, ones_end=0;
+ int ext_begin=0, ext_len=0;
+ int ext_count, total_ext_count;
+ VARDECL(opus_extension_data, all_extensions);
+ SAVE_STACK;
if (begin<0 || begin>=end || end>rp->nb_frames)
{
/*fprintf(stderr, "%d %d %d\n", begin, end, rp->nb_frames);*/
+ RESTORE_STACK;
return OPUS_BAD_ARG;
}
count = end-begin;
@@ -122,13 +138,50 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int
else
tot_size = 0;
+ /* figure out total number of extensions */
+ total_ext_count = nb_extensions;
+ for (i=begin;i<end;i++)
+ {
+ int n = opus_packet_extensions_count(rp->paddings[i], rp->padding_len[i]);
+ if (n > 0) total_ext_count += n;
+ }
+ ALLOC(all_extensions, total_ext_count ? total_ext_count : ALLOC_NONE, opus_extension_data);
+ /* copy over any extensions that were passed in */
+ for (ext_count=0;ext_count<nb_extensions;ext_count++)
+ {
+ all_extensions[ext_count] = extensions[ext_count];
+ }
+
+ /* incorporate any extensions from the repacketizer padding */
+ for (i=begin;i<end;i++)
+ {
+ int frame_ext_count, j;
+ frame_ext_count = total_ext_count - ext_count;
+ int ret = opus_packet_extensions_parse(rp->paddings[i], rp->padding_len[i],
+ &all_extensions[ext_count], &frame_ext_count);
+ if (ret<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ /* renumber the extension frame numbers */
+ for (j=0;j<frame_ext_count;j++)
+ {
+ all_extensions[ext_count+j].frame += i-begin;
+ }
+ ext_count += frame_ext_count;
+ }
+
ptr = data;
if (count==1)
{
/* Code 0 */
tot_size += len[0]+1;
if (tot_size > maxlen)
+ {
+ RESTORE_STACK;
return OPUS_BUFFER_TOO_SMALL;
+ }
*ptr++ = rp->toc&0xFC;
} else if (count==2)
{
@@ -137,18 +190,24 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int
/* Code 1 */
tot_size += 2*len[0]+1;
if (tot_size > maxlen)
+ {
+ RESTORE_STACK;
return OPUS_BUFFER_TOO_SMALL;
+ }
*ptr++ = (rp->toc&0xFC) | 0x1;
} else {
/* Code 2 */
tot_size += len[0]+len[1]+2+(len[0]>=252);
if (tot_size > maxlen)
+ {
+ RESTORE_STACK;
return OPUS_BUFFER_TOO_SMALL;
+ }
*ptr++ = (rp->toc&0xFC) | 0x2;
ptr += encode_size(len[0], ptr);
}
}
- if (count > 2 || (pad && tot_size < maxlen))
+ if (count > 2 || (pad && tot_size < maxlen) || ext_count > 0)
{
/* Code 3 */
int vbr;
@@ -177,22 +236,45 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int
tot_size += len[count-1];
if (tot_size > maxlen)
+ {
+ RESTORE_STACK;
return OPUS_BUFFER_TOO_SMALL;
+ }
*ptr++ = (rp->toc&0xFC) | 0x3;
*ptr++ = count | 0x80;
} else {
tot_size += count*len[0]+2;
if (tot_size > maxlen)
+ {
+ RESTORE_STACK;
return OPUS_BUFFER_TOO_SMALL;
+ }
*ptr++ = (rp->toc&0xFC) | 0x3;
*ptr++ = count;
}
pad_amount = pad ? (maxlen-tot_size) : 0;
+ if (ext_count>0)
+ {
+ /* figure out how much space we need for the extensions */
+ ext_len = opus_packet_extensions_generate(NULL, maxlen-tot_size, all_extensions, ext_count, 0);
+ if (ext_len < 0) return ext_len;
+ if (!pad)
+ pad_amount = ext_len + ext_len/254 + 1;
+ }
if (pad_amount != 0)
{
int nb_255s;
data[1] |= 0x40;
nb_255s = (pad_amount-1)/255;
+ if (tot_size + ext_len + nb_255s + 1 > maxlen)
+ {
+ RESTORE_STACK;
+ return OPUS_BUFFER_TOO_SMALL;
+ }
+ ext_begin = tot_size+pad_amount-ext_len;
+ /* Prepend 0x01 padding */
+ ones_begin = tot_size+nb_255s+1;
+ ones_end = tot_size+pad_amount-ext_len;
for (i=0;i<nb_255s;i++)
*ptr++ = 255;
*ptr++ = pad_amount-255*nb_255s-1;
@@ -218,42 +300,62 @@ opus_int32 opus_repacketizer_out_range_impl(OpusRepacketizer *rp, int begin, int
OPUS_MOVE(ptr, frames[i], len[i]);
ptr += len[i];
}
- if (pad)
+ if (ext_len > 0) {
+ int ret = opus_packet_extensions_generate(&data[ext_begin], ext_len, all_extensions, ext_count, 0);
+ celt_assert(ret == ext_len);
+ }
+ for (i=ones_begin;i<ones_end;i++)
+ data[i] = 0x01;
+ if (pad && ext_count==0)
{
/* Fill padding with zeros. */
while (ptr<data+maxlen)
*ptr++=0;
}
+ RESTORE_STACK;
return tot_size;
}
opus_int32 opus_repacketizer_out_range(OpusRepacketizer *rp, int begin, int end, unsigned char *data, opus_int32 maxlen)
{
- return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0);
+ return opus_repacketizer_out_range_impl(rp, begin, end, data, maxlen, 0, 0, NULL, 0);
}
opus_int32 opus_repacketizer_out(OpusRepacketizer *rp, unsigned char *data, opus_int32 maxlen)
{
- return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0);
+ return opus_repacketizer_out_range_impl(rp, 0, rp->nb_frames, data, maxlen, 0, 0, NULL, 0);
}
-int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len)
+opus_int32 opus_packet_pad_impl(unsigned char *data, opus_int32 len, opus_int32 new_len, int pad, const opus_extension_data *extensions, int nb_extensions)
{
OpusRepacketizer rp;
opus_int32 ret;
+ VARDECL(unsigned char, copy);
+ SAVE_STACK;
if (len < 1)
return OPUS_BAD_ARG;
if (len==new_len)
return OPUS_OK;
else if (len > new_len)
return OPUS_BAD_ARG;
+ ALLOC(copy, len, unsigned char);
opus_repacketizer_init(&rp);
/* Moving payload to the end of the packet so we can do in-place padding */
- OPUS_MOVE(data+new_len-len, data, len);
- ret = opus_repacketizer_cat(&rp, data+new_len-len, len);
+ OPUS_COPY(copy, data, len);
+ ret = opus_repacketizer_cat(&rp, copy, len);
if (ret != OPUS_OK)
return ret;
- ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, 1);
+ ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, new_len, 0, pad, extensions, nb_extensions);
+ RESTORE_STACK;
+ return ret;
+}
+
+int opus_packet_pad(unsigned char *data, opus_int32 len, opus_int32 new_len)
+{
+ opus_int32 ret;
+ ALLOC_STACK;
+ ret = opus_packet_pad_impl(data, len, new_len, 1, NULL, 0);
+ RESTORE_STACK;
if (ret > 0)
return OPUS_OK;
else
@@ -264,13 +366,19 @@ opus_int32 opus_packet_unpad(unsigned char *data, opus_int32 len)
{
OpusRepacketizer rp;
opus_int32 ret;
+ int i;
if (len < 1)
return OPUS_BAD_ARG;
opus_repacketizer_init(&rp);
ret = opus_repacketizer_cat(&rp, data, len);
if (ret < 0)
return ret;
- ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0);
+ /* Discard all padding and extensions. */
+ for (i=0;i<rp.nb_frames;i++) {
+ rp.padding_len[i] = 0;
+ rp.paddings[i] = NULL;
+ }
+ ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, data, len, 0, 0, NULL, 0);
celt_assert(ret > 0 && ret <= len);
return ret;
}
@@ -297,7 +405,7 @@ int opus_multistream_packet_pad(unsigned char *data, opus_int32 len, opus_int32
if (len<=0)
return OPUS_INVALID_PACKET;
count = opus_packet_parse_impl(data, len, 1, &toc, NULL,
- size, NULL, &packet_offset);
+ size, NULL, &packet_offset, NULL, NULL);
if (count<0)
return count;
data += packet_offset;
@@ -324,18 +432,24 @@ opus_int32 opus_multistream_packet_unpad(unsigned char *data, opus_int32 len, in
for (s=0;s<nb_streams;s++)
{
opus_int32 ret;
+ int i;
int self_delimited = s!=nb_streams-1;
if (len<=0)
return OPUS_INVALID_PACKET;
opus_repacketizer_init(&rp);
ret = opus_packet_parse_impl(data, len, self_delimited, &toc, NULL,
- size, NULL, &packet_offset);
+ size, NULL, &packet_offset, NULL, NULL);
if (ret<0)
return ret;
ret = opus_repacketizer_cat_impl(&rp, data, packet_offset, self_delimited);
if (ret < 0)
return ret;
- ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, dst, len, self_delimited, 0);
+ /* Discard all padding and extensions. */
+ for (i=0;i<rp.nb_frames;i++) {
+ rp.padding_len[i] = 0;
+ rp.paddings[i] = NULL;
+ }
+ ret = opus_repacketizer_out_range_impl(&rp, 0, rp.nb_frames, dst, len, self_delimited, 0, NULL, 0);
if (ret < 0)
return ret;
else
diff --git a/media/libopus/src/tansig_table.h b/media/libopus/src/tansig_table.h
deleted file mode 100644
index c76f844a72..0000000000
--- a/media/libopus/src/tansig_table.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* This file is auto-generated by gen_tables */
-
-static const float tansig_table[201] = {
-0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f,
-0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f,
-0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,
-0.537050f, 0.564900f, 0.591519f, 0.616909f, 0.641077f,
-0.664037f, 0.685809f, 0.706419f, 0.725897f, 0.744277f,
-0.761594f, 0.777888f, 0.793199f, 0.807569f, 0.821040f,
-0.833655f, 0.845456f, 0.856485f, 0.866784f, 0.876393f,
-0.885352f, 0.893698f, 0.901468f, 0.908698f, 0.915420f,
-0.921669f, 0.927473f, 0.932862f, 0.937863f, 0.942503f,
-0.946806f, 0.950795f, 0.954492f, 0.957917f, 0.961090f,
-0.964028f, 0.966747f, 0.969265f, 0.971594f, 0.973749f,
-0.975743f, 0.977587f, 0.979293f, 0.980869f, 0.982327f,
-0.983675f, 0.984921f, 0.986072f, 0.987136f, 0.988119f,
-0.989027f, 0.989867f, 0.990642f, 0.991359f, 0.992020f,
-0.992631f, 0.993196f, 0.993718f, 0.994199f, 0.994644f,
-0.995055f, 0.995434f, 0.995784f, 0.996108f, 0.996407f,
-0.996682f, 0.996937f, 0.997172f, 0.997389f, 0.997590f,
-0.997775f, 0.997946f, 0.998104f, 0.998249f, 0.998384f,
-0.998508f, 0.998623f, 0.998728f, 0.998826f, 0.998916f,
-0.999000f, 0.999076f, 0.999147f, 0.999213f, 0.999273f,
-0.999329f, 0.999381f, 0.999428f, 0.999472f, 0.999513f,
-0.999550f, 0.999585f, 0.999617f, 0.999646f, 0.999673f,
-0.999699f, 0.999722f, 0.999743f, 0.999763f, 0.999781f,
-0.999798f, 0.999813f, 0.999828f, 0.999841f, 0.999853f,
-0.999865f, 0.999875f, 0.999885f, 0.999893f, 0.999902f,
-0.999909f, 0.999916f, 0.999923f, 0.999929f, 0.999934f,
-0.999939f, 0.999944f, 0.999948f, 0.999952f, 0.999956f,
-0.999959f, 0.999962f, 0.999965f, 0.999968f, 0.999970f,
-0.999973f, 0.999975f, 0.999977f, 0.999978f, 0.999980f,
-0.999982f, 0.999983f, 0.999984f, 0.999986f, 0.999987f,
-0.999988f, 0.999989f, 0.999990f, 0.999990f, 0.999991f,
-0.999992f, 0.999992f, 0.999993f, 0.999994f, 0.999994f,
-0.999994f, 0.999995f, 0.999995f, 0.999996f, 0.999996f,
-0.999996f, 0.999997f, 0.999997f, 0.999997f, 0.999997f,
-0.999997f, 0.999998f, 0.999998f, 0.999998f, 0.999998f,
-0.999998f, 0.999998f, 0.999999f, 0.999999f, 0.999999f,
-0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
-0.999999f, 0.999999f, 0.999999f, 0.999999f, 0.999999f,
-1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
-1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
-1.000000f,
-};