summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc')
-rw-r--r--third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc379
1 files changed, 379 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
new file mode 100644
index 0000000000..640a3e3cb9
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/residual_echo_estimator.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/reverb_model.h"
+#include "rtc_base/checks.h"
+#include "system_wrappers/include/field_trial.h"
+
+namespace webrtc {
+namespace {
+
+constexpr float kDefaultTransparentModeGain = 0.01f;
+
+float GetTransparentModeGain() {
+ return kDefaultTransparentModeGain;
+}
+
+float GetEarlyReflectionsDefaultModeGain(
+ const EchoCanceller3Config::EpStrength& config) {
+ if (field_trial::IsEnabled("WebRTC-Aec3UseLowEarlyReflectionsDefaultGain")) {
+ return 0.1f;
+ }
+ return config.default_gain;
+}
+
+float GetLateReflectionsDefaultModeGain(
+ const EchoCanceller3Config::EpStrength& config) {
+ if (field_trial::IsEnabled("WebRTC-Aec3UseLowLateReflectionsDefaultGain")) {
+ return 0.1f;
+ }
+ return config.default_gain;
+}
+
+bool UseErleOnsetCompensationInDominantNearend(
+ const EchoCanceller3Config::EpStrength& config) {
+ return config.erle_onset_compensation_in_dominant_nearend ||
+ field_trial::IsEnabled(
+ "WebRTC-Aec3UseErleOnsetCompensationInDominantNearend");
+}
+
+// Computes the indexes that will be used for computing spectral power over
+// the blocks surrounding the delay.
+void GetRenderIndexesToAnalyze(
+ const SpectrumBuffer& spectrum_buffer,
+ const EchoCanceller3Config::EchoModel& echo_model,
+ int filter_delay_blocks,
+ int* idx_start,
+ int* idx_stop) {
+ RTC_DCHECK(idx_start);
+ RTC_DCHECK(idx_stop);
+ size_t window_start;
+ size_t window_end;
+ window_start =
+ std::max(0, filter_delay_blocks -
+ static_cast<int>(echo_model.render_pre_window_size));
+ window_end = filter_delay_blocks +
+ static_cast<int>(echo_model.render_post_window_size);
+ *idx_start = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_start);
+ *idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1);
+}
+
+// Estimates the residual echo power based on the echo return loss enhancement
+// (ERLE) and the linear power estimate.
+void LinearEstimate(
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
+ RTC_DCHECK_EQ(S2_linear.size(), erle.size());
+ RTC_DCHECK_EQ(S2_linear.size(), R2.size());
+
+ const size_t num_capture_channels = R2.size();
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ RTC_DCHECK_LT(0.f, erle[ch][k]);
+ R2[ch][k] = S2_linear[ch][k] / erle[ch][k];
+ }
+ }
+}
+
+// Estimates the residual echo power based on the estimate of the echo path
+// gain.
+void NonLinearEstimate(
+ float echo_path_gain,
+ const std::array<float, kFftLengthBy2Plus1>& X2,
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
+ const size_t num_capture_channels = R2.size();
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ R2[ch][k] = X2[k] * echo_path_gain;
+ }
+ }
+}
+
+// Applies a soft noise gate to the echo generating power.
+void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config,
+ rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ if (config.noise_gate_power > X2[k]) {
+ X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope *
+ (config.noise_gate_power - X2[k]));
+ }
+ }
+}
+
+// Estimates the echo generating signal power as gated maximal power over a
+// time window.
+void EchoGeneratingPower(size_t num_render_channels,
+ const SpectrumBuffer& spectrum_buffer,
+ const EchoCanceller3Config::EchoModel& echo_model,
+ int filter_delay_blocks,
+ rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
+ int idx_stop;
+ int idx_start;
+ GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks,
+ &idx_start, &idx_stop);
+
+ std::fill(X2.begin(), X2.end(), 0.f);
+ if (num_render_channels == 1) {
+ for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
+ for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
+ X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]);
+ }
+ }
+ } else {
+ for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
+ std::array<float, kFftLengthBy2Plus1> render_power;
+ render_power.fill(0.f);
+ for (size_t ch = 0; ch < num_render_channels; ++ch) {
+ const auto& channel_power = spectrum_buffer.buffer[k][ch];
+ for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
+ render_power[j] += channel_power[j];
+ }
+ }
+ for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
+ X2[j] = std::max(X2[j], render_power[j]);
+ }
+ }
+ }
+}
+
+} // namespace
+
+ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config,
+ size_t num_render_channels)
+ : config_(config),
+ num_render_channels_(num_render_channels),
+ early_reflections_transparent_mode_gain_(GetTransparentModeGain()),
+ late_reflections_transparent_mode_gain_(GetTransparentModeGain()),
+ early_reflections_general_gain_(
+ GetEarlyReflectionsDefaultModeGain(config_.ep_strength)),
+ late_reflections_general_gain_(
+ GetLateReflectionsDefaultModeGain(config_.ep_strength)),
+ erle_onset_compensation_in_dominant_nearend_(
+ UseErleOnsetCompensationInDominantNearend(config_.ep_strength)) {
+ Reset();
+}
+
+ResidualEchoEstimator::~ResidualEchoEstimator() = default;
+
+void ResidualEchoEstimator::Estimate(
+ const AecState& aec_state,
+ const RenderBuffer& render_buffer,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
+ bool dominant_nearend,
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded) {
+ RTC_DCHECK_EQ(R2.size(), Y2.size());
+ RTC_DCHECK_EQ(R2.size(), S2_linear.size());
+
+ const size_t num_capture_channels = R2.size();
+
+ // Estimate the power of the stationary noise in the render signal.
+ UpdateRenderNoisePower(render_buffer);
+
+ // Estimate the residual echo power.
+ if (aec_state.UsableLinearEstimate()) {
+ // When there is saturated echo, assume the same spectral content as is
+ // present in the microphone signal.
+ if (aec_state.SaturatedEcho()) {
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
+ std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
+ }
+ } else {
+ const bool onset_compensated =
+ erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend;
+ LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2);
+ LinearEstimate(S2_linear, aec_state.ErleUnbounded(), R2_unbounded);
+ }
+
+ UpdateReverb(ReverbType::kLinear, aec_state, render_buffer,
+ dominant_nearend);
+ AddReverb(R2);
+ AddReverb(R2_unbounded);
+ } else {
+ const float echo_path_gain =
+ GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true);
+
+ // When there is saturated echo, assume the same spectral content as is
+ // present in the microphone signal.
+ if (aec_state.SaturatedEcho()) {
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
+ std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
+ }
+ } else {
+ // Estimate the echo generating signal power.
+ std::array<float, kFftLengthBy2Plus1> X2;
+ EchoGeneratingPower(num_render_channels_,
+ render_buffer.GetSpectrumBuffer(), config_.echo_model,
+ aec_state.MinDirectPathFilterDelay(), X2);
+ if (!aec_state.UseStationarityProperties()) {
+ ApplyNoiseGate(config_.echo_model, X2);
+ }
+
+ // Subtract the stationary noise power to avoid stationary noise causing
+ // excessive echo suppression.
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k];
+ X2[k] = std::max(0.f, X2[k]);
+ }
+
+ NonLinearEstimate(echo_path_gain, X2, R2);
+ NonLinearEstimate(echo_path_gain, X2, R2_unbounded);
+ }
+
+ if (config_.echo_model.model_reverb_in_nonlinear_mode &&
+ !aec_state.TransparentModeActive()) {
+ UpdateReverb(ReverbType::kNonLinear, aec_state, render_buffer,
+ dominant_nearend);
+ AddReverb(R2);
+ AddReverb(R2_unbounded);
+ }
+ }
+
+ if (aec_state.UseStationarityProperties()) {
+ // Scale the echo according to echo audibility.
+ std::array<float, kFftLengthBy2Plus1> residual_scaling;
+ aec_state.GetResidualEchoScaling(residual_scaling);
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ R2[ch][k] *= residual_scaling[k];
+ R2_unbounded[ch][k] *= residual_scaling[k];
+ }
+ }
+ }
+}
+
+void ResidualEchoEstimator::Reset() {
+ echo_reverb_.Reset();
+ X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
+ X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
+}
+
+void ResidualEchoEstimator::UpdateRenderNoisePower(
+ const RenderBuffer& render_buffer) {
+ std::array<float, kFftLengthBy2Plus1> render_power_data;
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
+ render_buffer.Spectrum(0);
+ rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
+ X2[/*channel=*/0];
+ if (num_render_channels_ > 1) {
+ render_power_data.fill(0.f);
+ for (size_t ch = 0; ch < num_render_channels_; ++ch) {
+ const auto& channel_power = X2[ch];
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ render_power_data[k] += channel_power[k];
+ }
+ }
+ render_power = render_power_data;
+ }
+
+ // Estimate the stationary noise power in a minimum statistics manner.
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ // Decrease rapidly.
+ if (render_power[k] < X2_noise_floor_[k]) {
+ X2_noise_floor_[k] = render_power[k];
+ X2_noise_floor_counter_[k] = 0;
+ } else {
+ // Increase in a delayed, leaky manner.
+ if (X2_noise_floor_counter_[k] >=
+ static_cast<int>(config_.echo_model.noise_floor_hold)) {
+ X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f,
+ config_.echo_model.min_noise_floor_power);
+ } else {
+ ++X2_noise_floor_counter_[k];
+ }
+ }
+ }
+}
+
+// Updates the reverb estimation.
+void ResidualEchoEstimator::UpdateReverb(ReverbType reverb_type,
+ const AecState& aec_state,
+ const RenderBuffer& render_buffer,
+ bool dominant_nearend) {
+ // Choose reverb partition based on what type of echo power model is used.
+ const size_t first_reverb_partition =
+ reverb_type == ReverbType::kLinear
+ ? aec_state.FilterLengthBlocks() + 1
+ : aec_state.MinDirectPathFilterDelay() + 1;
+
+ // Compute render power for the reverb.
+ std::array<float, kFftLengthBy2Plus1> render_power_data;
+ rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
+ render_buffer.Spectrum(first_reverb_partition);
+ rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
+ X2[/*channel=*/0];
+ if (num_render_channels_ > 1) {
+ render_power_data.fill(0.f);
+ for (size_t ch = 0; ch < num_render_channels_; ++ch) {
+ const auto& channel_power = X2[ch];
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ render_power_data[k] += channel_power[k];
+ }
+ }
+ render_power = render_power_data;
+ }
+
+ // Update the reverb estimate.
+ float reverb_decay = aec_state.ReverbDecay(/*mild=*/dominant_nearend);
+ if (reverb_type == ReverbType::kLinear) {
+ echo_reverb_.UpdateReverb(
+ render_power, aec_state.GetReverbFrequencyResponse(), reverb_decay);
+ } else {
+ const float echo_path_gain =
+ GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false);
+ echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
+ reverb_decay);
+ }
+}
+// Adds the estimated power of the reverb to the residual echo power.
+void ResidualEchoEstimator::AddReverb(
+ rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const {
+ const size_t num_capture_channels = R2.size();
+
+ // Add the reverb power.
+ rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
+ echo_reverb_.reverb();
+ for (size_t ch = 0; ch < num_capture_channels; ++ch) {
+ for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+ R2[ch][k] += reverb_power[k];
+ }
+ }
+}
+
+// Chooses the echo path gain to use.
+float ResidualEchoEstimator::GetEchoPathGain(
+ const AecState& aec_state,
+ bool gain_for_early_reflections) const {
+ float gain_amplitude;
+ if (aec_state.TransparentModeActive()) {
+ gain_amplitude = gain_for_early_reflections
+ ? early_reflections_transparent_mode_gain_
+ : late_reflections_transparent_mode_gain_;
+ } else {
+ gain_amplitude = gain_for_early_reflections
+ ? early_reflections_general_gain_
+ : late_reflections_general_gain_;
+ }
+ return gain_amplitude * gain_amplitude;
+}
+
+} // namespace webrtc