From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- .../audio_processing/aec3/echo_canceller3.cc | 992 +++++++++++++++++++++ 1 file changed, 992 insertions(+) create mode 100644 third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc (limited to 'third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc') diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc new file mode 100644 index 0000000000..e8e2175994 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/aec3/echo_canceller3.cc @@ -0,0 +1,992 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_canceller3.h" + +#include +#include + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/high_pass_filter.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { + +enum class EchoCanceller3ApiCall { kCapture, kRender }; + +bool DetectSaturation(rtc::ArrayView y) { + for (size_t k = 0; k < y.size(); ++k) { + if (y[k] >= 32700.0f || y[k] <= -32700.0f) { + return true; + } + } + return false; +} + +// Retrieves a value from a field trial if it is available. If no value is +// present, the default value is returned. If the retrieved value is beyond the +// specified limits, the default value is returned instead. +void RetrieveFieldTrialValue(absl::string_view trial_name, + float min, + float max, + float* value_to_update) { + const std::string field_trial_str = field_trial::FindFullName(trial_name); + + FieldTrialParameter field_trial_param(/*key=*/"", *value_to_update); + + ParseFieldTrial({&field_trial_param}, field_trial_str); + float field_trial_value = static_cast(field_trial_param.Get()); + + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; + *value_to_update = field_trial_value; + } +} + +void RetrieveFieldTrialValue(absl::string_view trial_name, + int min, + int max, + int* value_to_update) { + const std::string field_trial_str = field_trial::FindFullName(trial_name); + + FieldTrialParameter field_trial_param(/*key=*/"", *value_to_update); + + ParseFieldTrial({&field_trial_param}, field_trial_str); + float field_trial_value = field_trial_param.Get(); + + if (field_trial_value >= min && field_trial_value <= max && + field_trial_value != *value_to_update) { + RTC_LOG(LS_INFO) << "Key " << trial_name + << " changing AEC3 parameter value from " + << *value_to_update << " to " << field_trial_value; + *value_to_update = field_trial_value; + } +} + +void FillSubFrameView( + AudioBuffer* frame, + size_t sub_frame_index, + std::vector>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_LE(0, sub_frame_index); + RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size()); + RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size()); + for (size_t band = 0; band < sub_frame_view->size(); ++band) { + for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } +} + +void FillSubFrameView( + bool proper_downmix_needed, + std::vector>>* frame, + size_t sub_frame_index, + std::vector>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_EQ(frame->size(), sub_frame_view->size()); + const size_t frame_num_channels = (*frame)[0].size(); + const size_t sub_frame_num_channels = (*sub_frame_view)[0].size(); + if (frame_num_channels > sub_frame_num_channels) { + RTC_DCHECK_EQ(sub_frame_num_channels, 1u); + if (proper_downmix_needed) { + // When a proper downmix is needed (which is the case when proper stereo + // is present in the echo reference signal but the echo canceller does the + // processing in mono) downmix the echo reference by averaging the channel + // content (otherwise downmixing is done by selecting channel 0). + for (size_t band = 0; band < frame->size(); ++band) { + for (size_t ch = 1; ch < frame_num_channels; ++ch) { + for (size_t k = 0; k < kSubFrameLength; ++k) { + (*frame)[band][/*channel=*/0] + [sub_frame_index * kSubFrameLength + k] += + (*frame)[band][ch][sub_frame_index * kSubFrameLength + k]; + } + } + const float one_by_num_channels = 1.0f / frame_num_channels; + for (size_t k = 0; k < kSubFrameLength; ++k) { + (*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength + + k] *= one_by_num_channels; + } + } + } + for (size_t band = 0; band < frame->size(); ++band) { + (*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView( + &(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } else { + RTC_DCHECK_EQ(frame_num_channels, sub_frame_num_channels); + for (size_t band = 0; band < frame->size(); ++band) { + for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) { + (*sub_frame_view)[band][channel] = rtc::ArrayView( + &(*frame)[band][channel][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } + } + } +} + +void ProcessCaptureFrameContent( + AudioBuffer* linear_output, + AudioBuffer* capture, + bool level_change, + bool aec_reference_is_downmixed_stereo, + bool saturated_microphone_signal, + size_t sub_frame_index, + FrameBlocker* capture_blocker, + BlockFramer* linear_output_framer, + BlockFramer* output_framer, + BlockProcessor* block_processor, + Block* linear_output_block, + std::vector>>* + linear_output_sub_frame_view, + Block* capture_block, + std::vector>>* capture_sub_frame_view) { + FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view); + + if (linear_output) { + RTC_DCHECK(linear_output_framer); + RTC_DCHECK(linear_output_block); + RTC_DCHECK(linear_output_sub_frame_view); + FillSubFrameView(linear_output, sub_frame_index, + linear_output_sub_frame_view); + } + + capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view, + capture_block); + block_processor->ProcessCapture( + /*echo_path_gain_change=*/level_change || + aec_reference_is_downmixed_stereo, + saturated_microphone_signal, linear_output_block, capture_block); + output_framer->InsertBlockAndExtractSubFrame(*capture_block, + capture_sub_frame_view); + + if (linear_output) { + RTC_DCHECK(linear_output_framer); + linear_output_framer->InsertBlockAndExtractSubFrame( + *linear_output_block, linear_output_sub_frame_view); + } +} + +void ProcessRemainingCaptureFrameContent(bool level_change, + bool aec_reference_is_downmixed_stereo, + bool saturated_microphone_signal, + FrameBlocker* capture_blocker, + BlockFramer* linear_output_framer, + BlockFramer* output_framer, + BlockProcessor* block_processor, + Block* linear_output_block, + Block* block) { + if (!capture_blocker->IsBlockAvailable()) { + return; + } + + capture_blocker->ExtractBlock(block); + block_processor->ProcessCapture( + /*echo_path_gain_change=*/level_change || + aec_reference_is_downmixed_stereo, + saturated_microphone_signal, linear_output_block, block); + output_framer->InsertBlock(*block); + + if (linear_output_framer) { + RTC_DCHECK(linear_output_block); + linear_output_framer->InsertBlock(*linear_output_block); + } +} + +void BufferRenderFrameContent( + bool proper_downmix_needed, + std::vector>>* render_frame, + size_t sub_frame_index, + FrameBlocker* render_blocker, + BlockProcessor* block_processor, + Block* block, + std::vector>>* sub_frame_view) { + FillSubFrameView(proper_downmix_needed, render_frame, sub_frame_index, + sub_frame_view); + render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block); + block_processor->BufferRender(*block); +} + +void BufferRemainingRenderFrameContent(FrameBlocker* render_blocker, + BlockProcessor* block_processor, + Block* block) { + if (!render_blocker->IsBlockAvailable()) { + return; + } + render_blocker->ExtractBlock(block); + block_processor->BufferRender(*block); +} + +void CopyBufferIntoFrame(const AudioBuffer& buffer, + size_t num_bands, + size_t num_channels, + std::vector>>* frame) { + RTC_DCHECK_EQ(num_bands, frame->size()); + RTC_DCHECK_EQ(num_channels, (*frame)[0].size()); + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size()); + for (size_t band = 0; band < num_bands; ++band) { + for (size_t channel = 0; channel < num_channels; ++channel) { + rtc::ArrayView buffer_view( + &buffer.split_bands_const(channel)[band][0], + AudioBuffer::kSplitBandSize); + std::copy(buffer_view.begin(), buffer_view.end(), + (*frame)[band][channel].begin()); + } + } +} + +} // namespace + +// TODO(webrtc:5298): Move this to a separate file. +EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) { + EchoCanceller3Config adjusted_cfg = config; + + if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) { + adjusted_cfg.multi_channel.detect_stereo_content = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) { + adjusted_cfg.suppressor.high_bands_suppression + .anti_howling_activation_threshold = 25.f; + adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 0.01f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) { + adjusted_cfg.filter.config_change_duration_blocks = 10; + } + + if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 0.f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot1SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .1f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot2SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .2f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot3SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .3f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot6SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .6f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3UseDot9SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = .9f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 1.2f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 1.6f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) { + adjusted_cfg.filter.initial_state_seconds = 2.0f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3HighPassFilterEchoReference")) { + adjusted_cfg.filter.high_pass_filter_echo_reference = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) { + adjusted_cfg.ep_strength.echo_can_saturate = false; + } + + const std::string use_nearend_reverb_len_tunings = + field_trial::FindFullName("WebRTC-Aec3UseNearendReverbLen"); + FieldTrialParameter nearend_reverb_default_len( + "default_len", adjusted_cfg.ep_strength.default_len); + FieldTrialParameter nearend_reverb_nearend_len( + "nearend_len", adjusted_cfg.ep_strength.nearend_len); + + ParseFieldTrial({&nearend_reverb_default_len, &nearend_reverb_nearend_len}, + use_nearend_reverb_len_tunings); + float default_len = static_cast(nearend_reverb_default_len.Get()); + float nearend_len = static_cast(nearend_reverb_nearend_len.Get()); + if (default_len > -1 && default_len < 1 && nearend_len > -1 && + nearend_len < 1) { + adjusted_cfg.ep_strength.default_len = + static_cast(nearend_reverb_default_len.Get()); + adjusted_cfg.ep_strength.nearend_len = + static_cast(nearend_reverb_nearend_len.Get()); + } + + if (field_trial::IsEnabled("WebRTC-Aec3ConservativeTailFreqResponse")) { + adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = true; + } + + if (field_trial::IsDisabled("WebRTC-Aec3ConservativeTailFreqResponse")) { + adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) { + // Two blocks headroom. + adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) { + adjusted_cfg.erle.clamp_quality_estimate_to_zero = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) { + adjusted_cfg.erle.clamp_quality_estimate_to_one = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) { + adjusted_cfg.erle.onset_detection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) { + adjusted_cfg.delay.render_alignment_mixing.downmix = true; + adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) { + adjusted_cfg.delay.capture_alignment_mixing.downmix = true; + adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) { + adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels = + true; + } + + if (field_trial::IsEnabled( + "WebRTC-" + "Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) { + adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels = + false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) { + adjusted_cfg.delay.detect_pre_echo = true; + } + + if (field_trial::IsDisabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) { + adjusted_cfg.delay.detect_pre_echo = false; + } + + if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) { + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3VerySensitiveDominantNearendActivation")) { + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) { + adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) { + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f; + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) { + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f; + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) { + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f; + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) { + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f; + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) { + adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) { + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) { + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) { + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceConservativeHfSuppression")) { + adjusted_cfg.suppressor.conservative_hf_suppression = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) { + adjusted_cfg.echo_audibility.use_stationarity_properties = true; + } + + if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceStationarityPropertiesAtInit")) { + adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true; + } + + if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) { + adjusted_cfg.render_levels.active_render_limit = 50.f; + } else if (field_trial::IsEnabled( + "WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) { + adjusted_cfg.render_levels.active_render_limit = 30.f; + } + + if (field_trial::IsEnabled("WebRTC-Aec3NonlinearModeReverbKillSwitch")) { + adjusted_cfg.echo_model.model_reverb_in_nonlinear_mode = false; + } + + // Field-trial based override for the whole suppressor tuning. + const std::string suppressor_tuning_override_trial_name = + field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride"); + + FieldTrialParameter nearend_tuning_mask_lf_enr_transparent( + "nearend_tuning_mask_lf_enr_transparent", + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent); + FieldTrialParameter nearend_tuning_mask_lf_enr_suppress( + "nearend_tuning_mask_lf_enr_suppress", + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress); + FieldTrialParameter nearend_tuning_mask_hf_enr_transparent( + "nearend_tuning_mask_hf_enr_transparent", + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent); + FieldTrialParameter nearend_tuning_mask_hf_enr_suppress( + "nearend_tuning_mask_hf_enr_suppress", + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress); + FieldTrialParameter nearend_tuning_max_inc_factor( + "nearend_tuning_max_inc_factor", + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor); + FieldTrialParameter nearend_tuning_max_dec_factor_lf( + "nearend_tuning_max_dec_factor_lf", + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf); + FieldTrialParameter normal_tuning_mask_lf_enr_transparent( + "normal_tuning_mask_lf_enr_transparent", + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent); + FieldTrialParameter normal_tuning_mask_lf_enr_suppress( + "normal_tuning_mask_lf_enr_suppress", + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress); + FieldTrialParameter normal_tuning_mask_hf_enr_transparent( + "normal_tuning_mask_hf_enr_transparent", + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent); + FieldTrialParameter normal_tuning_mask_hf_enr_suppress( + "normal_tuning_mask_hf_enr_suppress", + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress); + FieldTrialParameter normal_tuning_max_inc_factor( + "normal_tuning_max_inc_factor", + adjusted_cfg.suppressor.normal_tuning.max_inc_factor); + FieldTrialParameter normal_tuning_max_dec_factor_lf( + "normal_tuning_max_dec_factor_lf", + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf); + FieldTrialParameter dominant_nearend_detection_enr_threshold( + "dominant_nearend_detection_enr_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold); + FieldTrialParameter dominant_nearend_detection_enr_exit_threshold( + "dominant_nearend_detection_enr_exit_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold); + FieldTrialParameter dominant_nearend_detection_snr_threshold( + "dominant_nearend_detection_snr_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold); + FieldTrialParameter dominant_nearend_detection_hold_duration( + "dominant_nearend_detection_hold_duration", + adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration); + FieldTrialParameter dominant_nearend_detection_trigger_threshold( + "dominant_nearend_detection_trigger_threshold", + adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold); + + ParseFieldTrial( + {&nearend_tuning_mask_lf_enr_transparent, + &nearend_tuning_mask_lf_enr_suppress, + &nearend_tuning_mask_hf_enr_transparent, + &nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor, + &nearend_tuning_max_dec_factor_lf, + &normal_tuning_mask_lf_enr_transparent, + &normal_tuning_mask_lf_enr_suppress, + &normal_tuning_mask_hf_enr_transparent, + &normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor, + &normal_tuning_max_dec_factor_lf, + &dominant_nearend_detection_enr_threshold, + &dominant_nearend_detection_enr_exit_threshold, + &dominant_nearend_detection_snr_threshold, + &dominant_nearend_detection_hold_duration, + &dominant_nearend_detection_trigger_threshold}, + suppressor_tuning_override_trial_name); + + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = + static_cast(nearend_tuning_mask_lf_enr_transparent.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = + static_cast(nearend_tuning_mask_lf_enr_suppress.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = + static_cast(nearend_tuning_mask_hf_enr_transparent.Get()); + adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = + static_cast(nearend_tuning_mask_hf_enr_suppress.Get()); + adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = + static_cast(nearend_tuning_max_inc_factor.Get()); + adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = + static_cast(nearend_tuning_max_dec_factor_lf.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = + static_cast(normal_tuning_mask_lf_enr_transparent.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = + static_cast(normal_tuning_mask_lf_enr_suppress.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = + static_cast(normal_tuning_mask_hf_enr_transparent.Get()); + adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = + static_cast(normal_tuning_mask_hf_enr_suppress.Get()); + adjusted_cfg.suppressor.normal_tuning.max_inc_factor = + static_cast(normal_tuning_max_inc_factor.Get()); + adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = + static_cast(normal_tuning_max_dec_factor_lf.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = + static_cast(dominant_nearend_detection_enr_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold = + static_cast(dominant_nearend_detection_enr_exit_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold = + static_cast(dominant_nearend_detection_snr_threshold.Get()); + adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration = + dominant_nearend_detection_hold_duration.Get(); + adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold = + dominant_nearend_detection_trigger_threshold.Get(); + + // Field trial-based overrides of individual suppressor parameters. + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.max_inc_factor); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.max_inc_factor); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f, + 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f, + &adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000, + &adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration); + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000, + &adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold); + + RetrieveFieldTrialValue( + "WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f, + &adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain); + + // Field trial-based overrides of individual delay estimator parameters. + RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing); + RetrieveFieldTrialValue( + "WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f, + &adjusted_cfg.delay.delay_estimate_smoothing_delay_found); + + return adjusted_cfg; +} + +class EchoCanceller3::RenderWriter { + public: + RenderWriter(ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue, + size_t num_bands, + size_t num_channels); + + RenderWriter() = delete; + RenderWriter(const RenderWriter&) = delete; + RenderWriter& operator=(const RenderWriter&) = delete; + + ~RenderWriter(); + void Insert(const AudioBuffer& input); + + private: + ApmDataDumper* data_dumper_; + const size_t num_bands_; + const size_t num_channels_; + std::unique_ptr high_pass_filter_; + std::vector>> render_queue_input_frame_; + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue_; +}; + +EchoCanceller3::RenderWriter::RenderWriter( + ApmDataDumper* data_dumper, + const EchoCanceller3Config& config, + SwapQueue>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue, + size_t num_bands, + size_t num_channels) + : data_dumper_(data_dumper), + num_bands_(num_bands), + num_channels_(num_channels), + render_queue_input_frame_( + num_bands_, + std::vector>( + num_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + render_transfer_queue_(render_transfer_queue) { + RTC_DCHECK(data_dumper); + if (config.filter.high_pass_filter_echo_reference) { + high_pass_filter_ = std::make_unique(16000, num_channels); + } +} + +EchoCanceller3::RenderWriter::~RenderWriter() = default; + +void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) { + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band()); + RTC_DCHECK_EQ(num_bands_, input.num_bands()); + RTC_DCHECK_EQ(num_channels_, input.num_channels()); + + // TODO(bugs.webrtc.org/8759) Temporary work-around. + if (num_bands_ != input.num_bands()) + return; + + data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize, + &input.split_bands_const(0)[0][0], 16000, 1); + + CopyBufferIntoFrame(input, num_bands_, num_channels_, + &render_queue_input_frame_); + if (high_pass_filter_) { + high_pass_filter_->Process(&render_queue_input_frame_[0]); + } + + static_cast(render_transfer_queue_->Insert(&render_queue_input_frame_)); +} + +std::atomic EchoCanceller3::instance_count_(0); + +EchoCanceller3::EchoCanceller3( + const EchoCanceller3Config& config, + const absl::optional& multichannel_config, + int sample_rate_hz, + size_t num_render_channels, + size_t num_capture_channels) + : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)), + config_(AdjustConfig(config)), + sample_rate_hz_(sample_rate_hz), + num_bands_(NumBandsForRate(sample_rate_hz_)), + num_render_input_channels_(num_render_channels), + num_capture_channels_(num_capture_channels), + config_selector_(AdjustConfig(config), + multichannel_config, + num_render_input_channels_), + multichannel_content_detector_( + config_selector_.active_config().multi_channel.detect_stereo_content, + num_render_input_channels_, + config_selector_.active_config() + .multi_channel.stereo_detection_threshold, + config_selector_.active_config() + .multi_channel.stereo_detection_timeout_threshold_seconds, + config_selector_.active_config() + .multi_channel.stereo_detection_hysteresis_seconds), + output_framer_(num_bands_, num_capture_channels_), + capture_blocker_(num_bands_, num_capture_channels_), + render_transfer_queue_( + kRenderTransferQueueSizeFrames, + std::vector>>( + num_bands_, + std::vector>( + num_render_input_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + Aec3RenderQueueItemVerifier(num_bands_, + num_render_input_channels_, + AudioBuffer::kSplitBandSize)), + render_queue_output_frame_( + num_bands_, + std::vector>( + num_render_input_channels_, + std::vector(AudioBuffer::kSplitBandSize, 0.f))), + render_block_(num_bands_, num_render_input_channels_), + capture_block_(num_bands_, num_capture_channels_), + capture_sub_frame_view_( + num_bands_, + std::vector>(num_capture_channels_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); + + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { + block_delay_buffer_.reset(new BlockDelayBuffer( + num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize, + config_.delay.fixed_capture_delay_samples)); + } + + render_writer_.reset(new RenderWriter( + data_dumper_.get(), config_selector_.active_config(), + &render_transfer_queue_, num_bands_, num_render_input_channels_)); + + RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000); + RTC_DCHECK_GE(kMaxNumBands, num_bands_); + + if (config_selector_.active_config().filter.export_linear_aec_output) { + linear_output_framer_.reset( + new BlockFramer(/*num_bands=*/1, num_capture_channels_)); + linear_output_block_ = + std::make_unique(/*num_bands=*/1, num_capture_channels_), + linear_output_sub_frame_view_ = + std::vector>>( + 1, std::vector>(num_capture_channels_)); + } + + Initialize(); + + RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_ + << " Hz, num render channels: " << num_render_input_channels_ + << ", num capture channels: " << num_capture_channels_; +} + +EchoCanceller3::~EchoCanceller3() = default; + +void EchoCanceller3::Initialize() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + + num_render_channels_to_aec_ = + multichannel_content_detector_.IsProperMultiChannelContentDetected() + ? num_render_input_channels_ + : 1; + + config_selector_.Update( + multichannel_content_detector_.IsProperMultiChannelContentDetected()); + + render_block_.SetNumChannels(num_render_channels_to_aec_); + + render_blocker_.reset( + new FrameBlocker(num_bands_, num_render_channels_to_aec_)); + + block_processor_.reset(BlockProcessor::Create( + config_selector_.active_config(), sample_rate_hz_, + num_render_channels_to_aec_, num_capture_channels_)); + + render_sub_frame_view_ = std::vector>>( + num_bands_, + std::vector>(num_render_channels_to_aec_)); +} + +void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) { + RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_); + + RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_); + data_dumper_->DumpRaw("aec3_call_order", + static_cast(EchoCanceller3ApiCall::kRender)); + + return render_writer_->Insert(render); +} + +void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(), + capture.channels_const()[0], sample_rate_hz_, 1); + saturated_microphone_signal_ = false; + for (size_t channel = 0; channel < capture.num_channels(); ++channel) { + saturated_microphone_signal_ |= + DetectSaturation(rtc::ArrayView( + capture.channels_const()[channel], capture.num_frames())); + if (saturated_microphone_signal_) { + break; + } + } +} + +void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) { + ProcessCapture(capture, nullptr, level_change); +} + +void EchoCanceller3::ProcessCapture(AudioBuffer* capture, + AudioBuffer* linear_output, + bool level_change) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(capture); + RTC_DCHECK_EQ(num_bands_, capture->num_bands()); + RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band()); + RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_); + data_dumper_->DumpRaw("aec3_call_order", + static_cast(EchoCanceller3ApiCall::kCapture)); + + if (linear_output && !linear_output_framer_) { + RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without " + "properly configuring AEC3."; + RTC_DCHECK_NOTREACHED(); + } + + // Report capture call in the metrics and periodically update API call + // metrics. + api_call_metrics_.ReportCaptureCall(); + + // Optionally delay the capture signal. + if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) { + RTC_DCHECK(block_delay_buffer_); + block_delay_buffer_->DelaySignal(capture); + } + + rtc::ArrayView capture_lower_band = rtc::ArrayView( + &capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize); + + data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1); + + EmptyRenderQueue(); + + ProcessCaptureFrameContent( + linear_output, capture, level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, 0, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &linear_output_sub_frame_view_, + &capture_block_, &capture_sub_frame_view_); + + ProcessCaptureFrameContent( + linear_output, capture, level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, 1, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &linear_output_sub_frame_view_, + &capture_block_, &capture_sub_frame_view_); + + ProcessRemainingCaptureFrameContent( + level_change, + multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(), + saturated_microphone_signal_, &capture_blocker_, + linear_output_framer_.get(), &output_framer_, block_processor_.get(), + linear_output_block_.get(), &capture_block_); + + data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize, + &capture->split_bands(0)[0][0], 16000, 1); +} + +EchoControl::Metrics EchoCanceller3::GetMetrics() const { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + Metrics metrics; + block_processor_->GetMetrics(&metrics); + return metrics; +} + +void EchoCanceller3::SetAudioBufferDelay(int delay_ms) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->SetAudioBufferDelay(delay_ms); +} + +void EchoCanceller3::SetCaptureOutputUsage(bool capture_output_used) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->SetCaptureOutputUsage(capture_output_used); +} + +bool EchoCanceller3::ActiveProcessing() const { + return true; +} + +EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() { + EchoCanceller3Config cfg; + // Use shorter and more rapidly adapting coarse filter to compensate for + // thge increased number of total filter parameters to adapt. + cfg.filter.coarse.length_blocks = 11; + cfg.filter.coarse.rate = 0.95f; + cfg.filter.coarse_initial.length_blocks = 11; + cfg.filter.coarse_initial.rate = 0.95f; + + // Use more concervative suppressor behavior for non-nearend speech. + cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f; + cfg.suppressor.normal_tuning.max_inc_factor = 1.5f; + return cfg; +} + +void EchoCanceller3::SetBlockProcessorForTesting( + std::unique_ptr block_processor) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(block_processor); + block_processor_ = std::move(block_processor); +} + +void EchoCanceller3::EmptyRenderQueue() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + bool frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + while (frame_to_buffer) { + // Report render call in the metrics. + api_call_metrics_.ReportRenderCall(); + + if (multichannel_content_detector_.UpdateDetection( + render_queue_output_frame_)) { + // Reinitialize the AEC when proper stereo is detected. + Initialize(); + } + + // Buffer frame content. + BufferRenderFrameContent( + /*proper_downmix_needed=*/multichannel_content_detector_ + .IsTemporaryMultiChannelContentDetected(), + &render_queue_output_frame_, 0, render_blocker_.get(), + block_processor_.get(), &render_block_, &render_sub_frame_view_); + + BufferRenderFrameContent( + /*proper_downmix_needed=*/multichannel_content_detector_ + .IsTemporaryMultiChannelContentDetected(), + &render_queue_output_frame_, 1, render_blocker_.get(), + block_processor_.get(), &render_block_, &render_sub_frame_view_); + + BufferRemainingRenderFrameContent(render_blocker_.get(), + block_processor_.get(), &render_block_); + + frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + } +} +} // namespace webrtc -- cgit v1.2.3