summaryrefslogtreecommitdiffstats
path: root/media/libopus/src/opus_encoder.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:13:27 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:13:27 +0000
commit40a355a42d4a9444dc753c04c6608dade2f06a23 (patch)
tree871fc667d2de662f171103ce5ec067014ef85e61 /media/libopus/src/opus_encoder.c
parentAdding upstream version 124.0.1. (diff)
downloadfirefox-adbda400be353e676059e335c3c0aaf99e719475.tar.xz
firefox-adbda400be353e676059e335c3c0aaf99e719475.zip
Adding upstream version 125.0.1.upstream/125.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'media/libopus/src/opus_encoder.c')
-rw-r--r--media/libopus/src/opus_encoder.c651
1 files changed, 480 insertions, 171 deletions
diff --git a/media/libopus/src/opus_encoder.c b/media/libopus/src/opus_encoder.c
index 8c8db5a546..d18d582f03 100644
--- a/media/libopus/src/opus_encoder.c
+++ b/media/libopus/src/opus_encoder.c
@@ -45,11 +45,19 @@
#include "analysis.h"
#include "mathops.h"
#include "tuning_parameters.h"
+
+#ifdef ENABLE_DRED
+#include "dred_coding.h"
+#endif
+
#ifdef FIXED_POINT
#include "fixed/structs_FIX.h"
#else
#include "float/structs_FLP.h"
#endif
+#ifdef ENABLE_OSCE_TRAINING_DATA
+#include <stdio.h>
+#endif
#define MAX_ENCODER_BUFFER 480
@@ -67,6 +75,9 @@ struct OpusEncoder {
int celt_enc_offset;
int silk_enc_offset;
silk_EncControlStruct silk_mode;
+#ifdef ENABLE_DRED
+ DREDEnc dred_encoder;
+#endif
int application;
int channels;
int delay_compensation;
@@ -116,6 +127,14 @@ struct OpusEncoder {
int nb_no_activity_ms_Q1;
opus_val32 peak_signal_energy;
#endif
+#ifdef ENABLE_DRED
+ int dred_duration;
+ int dred_q0;
+ int dred_dQ;
+ int dred_qmax;
+ int dred_target_chunks;
+ unsigned char activity_mem[DRED_MAX_FRAMES*4]; /* 2.5ms resolution*/
+#endif
int nonfinal_frame; /* current frame is not the final in a packet */
opus_uint32 rangeFinal;
};
@@ -224,6 +243,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
st->silk_mode.packetLossPercentage = 0;
st->silk_mode.complexity = 9;
st->silk_mode.useInBandFEC = 0;
+ st->silk_mode.useDRED = 0;
st->silk_mode.useDTX = 0;
st->silk_mode.useCBR = 0;
st->silk_mode.reducedDependency = 0;
@@ -236,6 +256,11 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
+#ifdef ENABLE_DRED
+ /* Initialize DRED Encoder */
+ dred_encoder_init( &st->dred_encoder, Fs, channels );
+#endif
+
st->use_vbr = 1;
/* Makes constrained VBR the default (safer for real-time use) */
st->vbr_constraint = 1;
@@ -544,6 +569,73 @@ OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, i
return st;
}
+#ifdef ENABLE_DRED
+
+static const float dred_bits_table[16] = {73.2f, 68.1f, 62.5f, 57.0f, 51.5f, 45.7f, 39.9f, 32.4f, 26.4f, 20.4f, 16.3f, 13.f, 9.3f, 8.2f, 7.2f, 6.4f};
+static int estimate_dred_bitrate(int q0, int dQ, int qmax, int duration, opus_int32 target_bits, int *target_chunks) {
+ int dred_chunks;
+ int i;
+ float bits;
+ /* Signaling DRED costs 3 bytes. */
+ bits = 8*(3+DRED_EXPERIMENTAL_BYTES);
+ /* Approximation for the size of the IS. */
+ bits += 50.f+dred_bits_table[q0];
+ dred_chunks = IMIN((duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2);
+ if (target_chunks != NULL) *target_chunks = 0;
+ for (i=0;i<dred_chunks;i++) {
+ int q = compute_quantizer(q0, dQ, qmax, i);
+ bits += dred_bits_table[q];
+ if (target_chunks != NULL && bits < target_bits) *target_chunks = i+1;
+ }
+ return (int)floor(.5f+bits);
+}
+
+static opus_int32 compute_dred_bitrate(OpusEncoder *st, opus_int32 bitrate_bps, int frame_size)
+{
+ float dred_frac;
+ int bitrate_offset;
+ opus_int32 dred_bitrate;
+ opus_int32 target_dred_bitrate;
+ int target_chunks;
+ opus_int32 max_dred_bits;
+ int q0, dQ, qmax;
+ if (st->silk_mode.useInBandFEC) {
+ dred_frac = MIN16(.7f, 3.f*st->silk_mode.packetLossPercentage/100.f);
+ bitrate_offset = 20000;
+ } else {
+ if (st->silk_mode.packetLossPercentage > 5) {
+ dred_frac = MIN16(.8f, .55f + st->silk_mode.packetLossPercentage/100.f);
+ } else {
+ dred_frac = 12*st->silk_mode.packetLossPercentage/100.f;
+ }
+ bitrate_offset = 12000;
+ }
+ /* Account for the fact that longer packets require less redundancy. */
+ dred_frac = dred_frac/(dred_frac + (1-dred_frac)*(frame_size*50.f)/st->Fs);
+ /* Approximate fit based on a few experiments. Could probably be improved. */
+ q0 = IMIN(15, IMAX(4, 51 - 3*EC_ILOG(IMAX(1, bitrate_bps-bitrate_offset))));
+ dQ = bitrate_bps-bitrate_offset > 36000 ? 3 : 5;
+ qmax = 15;
+ target_dred_bitrate = IMAX(0, (int)(dred_frac*(bitrate_bps-bitrate_offset)));
+ if (st->dred_duration > 0) {
+ opus_int32 target_bits = target_dred_bitrate*frame_size/st->Fs;
+ max_dred_bits = estimate_dred_bitrate(q0, dQ, qmax, st->dred_duration, target_bits, &target_chunks);
+ } else {
+ max_dred_bits = 0;
+ target_chunks=0;
+ }
+ dred_bitrate = IMIN(target_dred_bitrate, max_dred_bits*st->Fs/frame_size);
+ /* If we can't afford enough bits, don't bother with DRED at all. */
+ if (target_chunks < 2)
+ dred_bitrate = 0;
+ st->dred_q0 = q0;
+ st->dred_dQ = dQ;
+ st->dred_qmax = qmax;
+ st->dred_target_chunks = target_chunks;
+ return dred_bitrate;
+}
+#endif
+
static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
{
if(!frame_size)frame_size=st->Fs/400;
@@ -872,7 +964,7 @@ static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, in
/* Compute the right shift required in the MAC to avoid an overflow */
max_shift = celt_ilog2(len);
- shift = IMAX(0, (celt_ilog2(sample_max) << 1) + max_shift - 28);
+ shift = IMAX(0, (celt_ilog2(1+sample_max) << 1) + max_shift - 28);
/* Compute the energy */
for (i=0; i<len; i++)
@@ -922,105 +1014,6 @@ static int decide_dtx_mode(opus_int activity, /* indicates if this fr
#endif
-static opus_int32 encode_multiframe_packet(OpusEncoder *st,
- const opus_val16 *pcm,
- int nb_frames,
- int frame_size,
- unsigned char *data,
- opus_int32 out_data_bytes,
- int to_celt,
- int lsb_depth,
- int float_api)
-{
- int i;
- int ret = 0;
- VARDECL(unsigned char, tmp_data);
- int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
- VARDECL(OpusRepacketizer, rp);
- int max_header_bytes;
- opus_int32 bytes_per_frame;
- opus_int32 cbr_bytes;
- opus_int32 repacketize_len;
- int tmp_len;
- ALLOC_STACK;
-
- /* Worst cases:
- * 2 frames: Code 2 with different compressed sizes
- * >2 frames: Code 3 VBR */
- max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2);
-
- if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX)
- repacketize_len = out_data_bytes;
- else {
- cbr_bytes = 3*st->bitrate_bps/(3*8*st->Fs/(frame_size*nb_frames));
- repacketize_len = IMIN(cbr_bytes, out_data_bytes);
- }
- bytes_per_frame = IMIN(1276, 1+(repacketize_len-max_header_bytes)/nb_frames);
-
- ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
- ALLOC(rp, 1, OpusRepacketizer);
- opus_repacketizer_init(rp);
-
- bak_mode = st->user_forced_mode;
- bak_bandwidth = st->user_bandwidth;
- bak_channels = st->force_channels;
-
- st->user_forced_mode = st->mode;
- st->user_bandwidth = st->bandwidth;
- st->force_channels = st->stream_channels;
-
- bak_to_mono = st->silk_mode.toMono;
- if (bak_to_mono)
- st->force_channels = 1;
- else
- st->prev_channels = st->stream_channels;
-
- for (i=0;i<nb_frames;i++)
- {
- st->silk_mode.toMono = 0;
- st->nonfinal_frame = i<(nb_frames-1);
-
- /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
- if (to_celt && i==nb_frames-1)
- st->user_forced_mode = MODE_CELT_ONLY;
-
- tmp_len = opus_encode_native(st, pcm+i*(st->channels*frame_size), frame_size,
- tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, NULL, 0, 0, 0, 0,
- NULL, float_api);
-
- if (tmp_len<0)
- {
- RESTORE_STACK;
- return OPUS_INTERNAL_ERROR;
- }
-
- ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);
-
- if (ret<0)
- {
- RESTORE_STACK;
- return OPUS_INTERNAL_ERROR;
- }
- }
-
- ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);
-
- if (ret<0)
- {
- RESTORE_STACK;
- return OPUS_INTERNAL_ERROR;
- }
-
- /* Discard configs that were forced locally for the purpose of repacketization */
- st->user_forced_mode = bak_mode;
- st->user_bandwidth = bak_bandwidth;
- st->force_channels = bak_channels;
- st->silk_mode.toMono = bak_to_mono;
-
- RESTORE_STACK;
- return ret;
-}
-
static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrate_bps, int frame_rate, int channels)
{
int redundancy_bytes_cap;
@@ -1049,6 +1042,18 @@ static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrat
return redundancy_bytes;
}
+static opus_int32 opus_encode_frame_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+ unsigned char *data, opus_int32 max_data_bytes,
+ int float_api, int first_frame,
+#ifdef ENABLE_DRED
+ opus_int32 dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ AnalysisInfo *analysis_info, int is_silence,
+#endif
+ int redundancy, int celt_to_silk, int prefill,
+ opus_int32 equiv_rate, int to_celt);
+
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
@@ -1058,28 +1063,17 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
CELTEncoder *celt_enc;
int i;
int ret=0;
- opus_int32 nBytes;
- ec_enc enc;
- int bytes_target;
int prefill=0;
- int start_band = 0;
int redundancy = 0;
- int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
int celt_to_silk = 0;
- VARDECL(opus_val16, pcm_buf);
- int nb_compr_bytes;
int to_celt = 0;
- opus_uint32 redundant_rng = 0;
- int cutoff_Hz, hp_freq_smth1;
int voice_est; /* Probability of voice in Q7 */
opus_int32 equiv_rate;
- int delay_compensation;
int frame_rate;
opus_int32 max_rate; /* Max bitrate we're allowed to use */
int curr_bandwidth;
- opus_val16 HB_gain;
opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
- int total_buffer;
+ opus_int32 cbr_bytes=-1;
opus_val16 stereo_width;
const CELTMode *celt_mode;
#ifndef DISABLE_FLOAT_API
@@ -1088,10 +1082,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
int analysis_read_subframe_bak=-1;
int is_silence = 0;
#endif
- opus_int activity = VAD_NO_DECISION;
-
- VARDECL(opus_val16, tmp_prefill);
-
+#ifdef ENABLE_DRED
+ opus_int32 dred_bitrate_bps;
+#endif
ALLOC_STACK;
max_data_bytes = IMIN(1276, out_data_bytes);
@@ -1112,10 +1105,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
silk_enc = (char*)st+st->silk_enc_offset;
celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
- if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
- delay_compensation = 0;
- else
- delay_compensation = st->delay_compensation;
lsb_depth = IMIN(lsb_depth, st->lsb_depth);
@@ -1157,20 +1146,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (!is_silence)
st->voice_ratio = -1;
- if (is_silence)
- {
- activity = !is_silence;
- } else if (analysis_info.valid)
- {
- activity = analysis_info.activity_probability >= DTX_ACTIVITY_THRESHOLD;
- if (!activity)
- {
- /* Mark as active if this noise frame is sufficiently loud */
- opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
- activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
- }
- }
-
st->detected_bandwidth = 0;
if (analysis_info.valid)
{
@@ -1207,21 +1182,24 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
else
stereo_width = 0;
- total_buffer = delay_compensation;
st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
frame_rate = st->Fs/frame_size;
if (!st->use_vbr)
{
- int cbrBytes;
/* Multiply by 12 to make sure the division is exact. */
int frame_rate12 = 12*st->Fs/frame_size;
/* We need to make sure that "int" values always fit in 16 bits. */
- cbrBytes = IMIN( (12*st->bitrate_bps/8 + frame_rate12/2)/frame_rate12, max_data_bytes);
- st->bitrate_bps = cbrBytes*(opus_int32)frame_rate12*8/12;
+ cbr_bytes = IMIN( (12*st->bitrate_bps/8 + frame_rate12/2)/frame_rate12, max_data_bytes);
+ st->bitrate_bps = cbr_bytes*(opus_int32)frame_rate12*8/12;
/* Make sure we provide at least one byte to avoid failing. */
- max_data_bytes = IMAX(1, cbrBytes);
+ max_data_bytes = IMAX(1, cbr_bytes);
}
+#ifdef ENABLE_DRED
+ /* Allocate some of the bits to DRED if needed. */
+ dred_bitrate_bps = compute_dred_bitrate(st, st->bitrate_bps, frame_size);
+ st->bitrate_bps -= dred_bitrate_bps;
+#endif
if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
|| (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
{
@@ -1575,6 +1553,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
{
int enc_frame_size;
int nb_frames;
+ VARDECL(unsigned char, tmp_data);
+ VARDECL(OpusRepacketizer, rp);
+ int max_header_bytes;
+ opus_int32 repacketize_len;
+ opus_int32 max_len_sum;
+ opus_int32 tot_size=0;
+ unsigned char *curr_data;
+ int tmp_len;
+ int dtx_count = 0;
if (st->mode == MODE_SILK_ONLY)
{
@@ -1593,17 +1580,186 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
#ifndef DISABLE_FLOAT_API
if (analysis_read_pos_bak!= -1)
{
+ /* Reset analysis position to the beginning of the first frame so we
+ can use it one frame at a time. */
st->analysis.read_pos = analysis_read_pos_bak;
st->analysis.read_subframe = analysis_read_subframe_bak;
}
#endif
- ret = encode_multiframe_packet(st, pcm, nb_frames, enc_frame_size, data,
- out_data_bytes, to_celt, lsb_depth, float_api);
+ /* Worst cases:
+ * 2 frames: Code 2 with different compressed sizes
+ * >2 frames: Code 3 VBR */
+ max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2);
+
+ if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX)
+ repacketize_len = out_data_bytes;
+ else {
+ celt_assert(cbr_bytes>=0);
+ repacketize_len = IMIN(cbr_bytes, out_data_bytes);
+ }
+ max_len_sum = nb_frames + repacketize_len - max_header_bytes;
+
+ ALLOC(tmp_data, max_len_sum, unsigned char);
+ curr_data = tmp_data;
+ ALLOC(rp, 1, OpusRepacketizer);
+ opus_repacketizer_init(rp);
+
+ int bak_to_mono = st->silk_mode.toMono;
+ if (bak_to_mono)
+ st->force_channels = 1;
+ else
+ st->prev_channels = st->stream_channels;
+
+ for (i=0;i<nb_frames;i++)
+ {
+ int first_frame;
+ int frame_to_celt;
+ int frame_redundancy;
+ opus_int32 curr_max;
+ /* Attempt DRED encoding until we have a non-DTX frame. In case of DTX refresh,
+ that allows for DRED not to be in the first frame. */
+ first_frame = (i == 0) || (i == dtx_count);
+ st->silk_mode.toMono = 0;
+ st->nonfinal_frame = i<(nb_frames-1);
+
+ /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
+ frame_to_celt = to_celt && i==nb_frames-1;
+ frame_redundancy = redundancy && (frame_to_celt || (!to_celt && i==0));
+
+ curr_max = IMIN(3*st->bitrate_bps/(3*8*st->Fs/enc_frame_size), max_len_sum/nb_frames);
+#ifdef ENABLE_DRED
+ curr_max = IMIN(curr_max, (max_len_sum-3*dred_bitrate_bps/(3*8*st->Fs/frame_size))/nb_frames);
+ if (first_frame) curr_max += 3*dred_bitrate_bps/(3*8*st->Fs/frame_size);
+#endif
+ curr_max = IMIN(max_len_sum-tot_size, curr_max);
+#ifndef DISABLE_FLOAT_API
+ if (analysis_read_pos_bak != -1) {
+ is_silence = is_digital_silence(pcm, frame_size, st->channels, lsb_depth);
+ /* Get analysis for current frame. */
+ tonality_get_info(&st->analysis, &analysis_info, enc_frame_size);
+ }
+#endif
+
+ tmp_len = opus_encode_frame_native(st, pcm+i*(st->channels*enc_frame_size), enc_frame_size, curr_data, curr_max, float_api, first_frame,
+#ifdef ENABLE_DRED
+ dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ &analysis_info,
+ is_silence,
+#endif
+ frame_redundancy, celt_to_silk, prefill,
+ equiv_rate, frame_to_celt
+ );
+ if (tmp_len<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ } else if (tmp_len==1) {
+ dtx_count++;
+ }
+ ret = opus_repacketizer_cat(rp, curr_data, tmp_len);
+
+ if (ret<0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ tot_size += tmp_len;
+ curr_data += tmp_len;
+ }
+ ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr && (dtx_count != nb_frames), NULL, 0);
+ if (ret<0)
+ {
+ ret = OPUS_INTERNAL_ERROR;
+ }
+ st->silk_mode.toMono = bak_to_mono;
RESTORE_STACK;
return ret;
+ } else {
+ ret = opus_encode_frame_native(st, pcm, frame_size, data, max_data_bytes, float_api, 1,
+#ifdef ENABLE_DRED
+ dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ &analysis_info,
+ is_silence,
+#endif
+ redundancy, celt_to_silk, prefill,
+ equiv_rate, to_celt
+ );
+ RESTORE_STACK;
+ return ret;
}
+}
+
+static opus_int32 opus_encode_frame_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
+ unsigned char *data, opus_int32 max_data_bytes,
+ int float_api, int first_frame,
+#ifdef ENABLE_DRED
+ opus_int32 dred_bitrate_bps,
+#endif
+#ifndef DISABLE_FLOAT_API
+ AnalysisInfo *analysis_info, int is_silence,
+#endif
+ int redundancy, int celt_to_silk, int prefill,
+ opus_int32 equiv_rate, int to_celt)
+{
+ void *silk_enc;
+ CELTEncoder *celt_enc;
+ const CELTMode *celt_mode;
+ int i;
+ int ret=0;
+ opus_int32 nBytes;
+ ec_enc enc;
+ int bytes_target;
+ int start_band = 0;
+ int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
+ int nb_compr_bytes;
+ opus_uint32 redundant_rng = 0;
+ int cutoff_Hz;
+ int hp_freq_smth1;
+ opus_val16 HB_gain;
+ int apply_padding;
+ int frame_rate;
+ int curr_bandwidth;
+ int delay_compensation;
+ int total_buffer;
+ opus_int activity = VAD_NO_DECISION;
+ VARDECL(opus_val16, pcm_buf);
+ VARDECL(opus_val16, tmp_prefill);
+ SAVE_STACK;
+
+ st->rangeFinal = 0;
+ silk_enc = (char*)st+st->silk_enc_offset;
+ celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+ celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
+ curr_bandwidth = st->bandwidth;
+ if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
+ delay_compensation = 0;
+ else
+ delay_compensation = st->delay_compensation;
+ total_buffer = delay_compensation;
+
+ frame_rate = st->Fs/frame_size;
+
+#ifndef DISABLE_FLOAT_API
+ if (is_silence)
+ {
+ activity = !is_silence;
+ } else if (analysis_info->valid)
+ {
+ activity = analysis_info->activity_probability >= DTX_ACTIVITY_THRESHOLD;
+ if (!activity)
+ {
+ /* Mark as active if this noise frame is sufficiently loud */
+ opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
+ activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
+ }
+ }
+#endif
/* For the first frame at a new SILK bandwidth */
if (st->silk_bw_switch)
@@ -1611,7 +1767,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
redundancy = 1;
celt_to_silk = 1;
st->silk_bw_switch = 0;
- /* Do a prefill without reseting the sampling rate control. */
+ /* Do a prefill without resetting the sampling rate control. */
prefill=2;
}
@@ -1651,6 +1807,25 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->application == OPUS_APPLICATION_VOIP)
{
hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
+
+#ifdef ENABLE_OSCE_TRAINING_DATA
+ /* write out high pass filtered clean signal*/
+ static FILE *fout =NULL;
+ if (fout == NULL)
+ {
+ fout = fopen("clean_hp.s16", "wb");
+ }
+
+ {
+ int idx;
+ opus_int16 tmp;
+ for (idx = 0; idx < frame_size; idx++)
+ {
+ tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
+ fwrite(&tmp, sizeof(tmp), 1, fout);
+ }
+ }
+#endif
} else {
dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
}
@@ -1667,8 +1842,24 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0;
}
}
+#else
+ (void)float_api;
#endif
+#ifdef ENABLE_DRED
+ if ( st->dred_duration > 0 && st->dred_encoder.loaded ) {
+ int frame_size_400Hz;
+ /* DRED Encoder */
+ dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size, total_buffer, st->arch );
+ frame_size_400Hz = frame_size*400/st->Fs;
+ OPUS_MOVE(&st->activity_mem[frame_size_400Hz], st->activity_mem, 4*DRED_MAX_FRAMES-frame_size_400Hz);
+ for (i=0;i<frame_size_400Hz;i++)
+ st->activity_mem[i] = activity;
+ } else {
+ st->dred_encoder.latents_buffer_fill = 0;
+ OPUS_CLEAR(st->activity_mem, DRED_MAX_FRAMES);
+ }
+#endif
/* SILK processing */
HB_gain = Q15ONE;
@@ -1763,7 +1954,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
st->silk_mode.maxInternalSampleRate = 16000;
if (st->mode == MODE_SILK_ONLY)
{
- opus_int32 effective_max_rate = max_rate;
+ opus_int32 effective_max_rate = frame_rate*max_data_bytes*8;
if (frame_rate > 50)
effective_max_rate = effective_max_rate*2/3;
if (effective_max_rate < 8000)
@@ -1793,9 +1984,19 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
}
if (st->silk_mode.useCBR)
{
+ /* When we're in CBR mode, but we have non-SILK data to encode, switch SILK to VBR with cap to
+ save on complexity. Any variations will be absorbed by CELT and/or DRED and we can still
+ produce a constant bitrate without wasting bits. */
+#ifdef ENABLE_DRED
+ if (st->mode == MODE_HYBRID || dred_bitrate_bps > 0)
+#else
if (st->mode == MODE_HYBRID)
+#endif
{
- st->silk_mode.maxBits = IMIN(st->silk_mode.maxBits, st->silk_mode.bitRate * frame_size / st->Fs);
+ /* Allow SILK to steal up to 25% of the remaining bits */
+ opus_int16 other_bits = IMAX(0, st->silk_mode.maxBits - st->silk_mode.bitRate * frame_size / st->Fs);
+ st->silk_mode.maxBits = IMAX(0, st->silk_mode.maxBits - other_bits*3/4);
+ st->silk_mode.useCBR = 0;
}
} else {
/* Constrained VBR. */
@@ -1908,26 +2109,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->mode != MODE_SILK_ONLY)
{
opus_val32 celt_pred=2;
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
/* We may still decide to disable prediction later */
if (st->silk_mode.reducedDependency)
celt_pred = 0;
celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
-
- if (st->mode == MODE_HYBRID)
- {
- if( st->use_vbr ) {
- celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
- }
- } else {
- if (st->use_vbr)
- {
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
- celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
- }
- }
}
ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
@@ -2021,13 +2206,27 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
ec_enc_done(&enc);
nb_compr_bytes = ret;
} else {
- nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes;
- ec_enc_shrink(&enc, nb_compr_bytes);
+ nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes;
+#ifdef ENABLE_DRED
+ if (st->dred_duration > 0)
+ {
+ int max_celt_bytes;
+ opus_int32 dred_bytes = dred_bitrate_bps/(frame_rate*8);
+ /* Allow CELT to steal up to 25% of the remaining bits. */
+ max_celt_bytes = nb_compr_bytes - dred_bytes*3/4;
+ /* But try to give CELT at least 5 bytes to prevent a mismatch with
+ the redundancy signaling. */
+ max_celt_bytes = IMAX((ec_tell(&enc)+7)/8 + 5, max_celt_bytes);
+ /* Subject to the original max. */
+ nb_compr_bytes = IMIN(nb_compr_bytes, max_celt_bytes);
+ }
+#endif
+ ec_enc_shrink(&enc, nb_compr_bytes);
}
#ifndef DISABLE_FLOAT_API
if (redundancy || st->mode != MODE_SILK_ONLY)
- celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+ celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
#endif
if (st->mode == MODE_HYBRID) {
SILKInfo info;
@@ -2057,6 +2256,34 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
if (st->mode != MODE_SILK_ONLY)
{
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr));
+ if (st->mode == MODE_HYBRID)
+ {
+ if( st->use_vbr ) {
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
+ }
+ } else {
+ if (st->use_vbr)
+ {
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
+ }
+ }
+#ifdef ENABLE_DRED
+ /* When Using DRED CBR, we can actually make the CELT part VBR and have DRED pick up the slack. */
+ if (!st->use_vbr && st->dred_duration > 0)
+ {
+ opus_int32 celt_bitrate = st->bitrate_bps;
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
+ celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
+ if (st->mode == MODE_HYBRID) {
+ celt_bitrate -= st->silk_mode.bitRate;
+ }
+ celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(celt_bitrate));
+ }
+#endif
if (st->mode != st->prev_mode && st->prev_mode > 0)
{
unsigned char dummy[2];
@@ -2069,10 +2296,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* If false, we already busted the budget and we'll end up with a "PLC frame" */
if (ec_tell(&enc) <= 8*nb_compr_bytes)
{
- /* Set the bitrate again if it was overridden in the redundancy code above*/
- if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr)
- celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
- celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr));
ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
if (ret < 0)
{
@@ -2080,10 +2303,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
return OPUS_INTERNAL_ERROR;
}
/* Put CELT->SILK redundancy data in the right place. */
- if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && st->use_vbr)
+ if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && nb_compr_bytes != ret)
{
OPUS_MOVE(data+ret, data+nb_compr_bytes, redundancy_bytes);
- nb_compr_bytes = nb_compr_bytes+redundancy_bytes;
+ nb_compr_bytes = ret+redundancy_bytes;
}
}
}
@@ -2140,7 +2363,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
/* DTX decision */
#ifndef DISABLE_FLOAT_API
- if (st->use_dtx && (analysis_info.valid || is_silence))
+ if (st->use_dtx && (analysis_info->valid || is_silence))
{
if (decide_dtx_mode(activity, &st->nb_no_activity_ms_Q1, 2*1000*frame_size/st->Fs))
{
@@ -2178,7 +2401,51 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
}
/* Count ToC and redundancy */
ret += 1+redundancy_bytes;
- if (!st->use_vbr)
+ apply_padding = !st->use_vbr;
+#ifdef ENABLE_DRED
+ if (st->dred_duration > 0 && st->dred_encoder.loaded && first_frame) {
+ opus_extension_data extension;
+ unsigned char buf[DRED_MAX_DATA_SIZE];
+ int dred_chunks;
+ int dred_bytes_left;
+ dred_chunks = IMIN((st->dred_duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2);
+ if (st->use_vbr) dred_chunks = IMIN(dred_chunks, st->dred_target_chunks);
+ /* Remaining space for DRED, accounting for cost the 3 extra bytes for code 3, padding length, and extension number. */
+ dred_bytes_left = IMIN(DRED_MAX_DATA_SIZE, max_data_bytes-ret-3);
+ /* Account for the extra bytes required to signal large padding length. */
+ dred_bytes_left -= (dred_bytes_left+1+DRED_EXPERIMENTAL_BYTES)/255;
+ /* Check whether we actually have something to encode. */
+ if (dred_chunks >= 1 && dred_bytes_left >= DRED_MIN_BYTES+DRED_EXPERIMENTAL_BYTES) {
+ int dred_bytes;
+#ifdef DRED_EXPERIMENTAL_VERSION
+ /* Add temporary extension type and version.
+ These bytes will be removed once extension is finalized. */
+ buf[0] = 'D';
+ buf[1] = DRED_EXPERIMENTAL_VERSION;
+#endif
+ dred_bytes = dred_encode_silk_frame(&st->dred_encoder, buf+DRED_EXPERIMENTAL_BYTES, dred_chunks, dred_bytes_left-DRED_EXPERIMENTAL_BYTES,
+ st->dred_q0, st->dred_dQ, st->dred_qmax, st->activity_mem, st->arch);
+ if (dred_bytes > 0) {
+ dred_bytes += DRED_EXPERIMENTAL_BYTES;
+ celt_assert(dred_bytes <= dred_bytes_left);
+ extension.id = DRED_EXTENSION_ID;
+ extension.frame = 0;
+ extension.data = buf;
+ extension.len = dred_bytes;
+ ret = opus_packet_pad_impl(data, ret, max_data_bytes, !st->use_vbr, &extension, 1);
+ if (ret < 0)
+ {
+ RESTORE_STACK;
+ return OPUS_INTERNAL_ERROR;
+ }
+ apply_padding = 0;
+ }
+ }
+ }
+#else
+ (void)first_frame; /* Avoids a warning about first_frame being unused. */
+#endif
+ if (apply_padding)
{
if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
{
@@ -2677,6 +2944,29 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
celt_encoder_ctl(celt_enc, OPUS_GET_PHASE_INVERSION_DISABLED(value));
}
break;
+#ifdef ENABLE_DRED
+ case OPUS_SET_DRED_DURATION_REQUEST:
+ {
+ opus_int32 value = va_arg(ap, opus_int32);
+ if(value<0 || value>DRED_MAX_FRAMES)
+ {
+ goto bad_arg;
+ }
+ st->dred_duration = value;
+ st->silk_mode.useDRED = !!value;
+ }
+ break;
+ case OPUS_GET_DRED_DURATION_REQUEST:
+ {
+ opus_int32 *value = va_arg(ap, opus_int32*);
+ if (!value)
+ {
+ goto bad_arg;
+ }
+ *value = st->dred_duration;
+ }
+ break;
+#endif
case OPUS_RESET_STATE:
{
void *silk_enc;
@@ -2692,6 +2982,10 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
silk_InitEncoder( silk_enc, st->arch, &dummy );
+#ifdef ENABLE_DRED
+ /* Initialize DRED Encoder */
+ dred_encoder_reset( &st->dred_encoder );
+#endif
st->stream_channels = st->channels;
st->hybrid_stereo_width_Q14 = 1 << 14;
st->prev_HB_gain = Q15ONE;
@@ -2752,6 +3046,21 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
}
}
break;
+#ifdef USE_WEIGHTS_FILE
+ case OPUS_SET_DNN_BLOB_REQUEST:
+ {
+ const unsigned char *data = va_arg(ap, const unsigned char *);
+ opus_int32 len = va_arg(ap, opus_int32);
+ if(len<0 || data == NULL)
+ {
+ goto bad_arg;
+ }
+#ifdef ENABLE_DRED
+ ret = dred_encoder_load_model(&st->dred_encoder, data, len);
+#endif
+ }
+ break;
+#endif
case CELT_GET_MODE_REQUEST:
{
const CELTMode ** value = va_arg(ap, const CELTMode**);