/* * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" #include #include #include #include "modules/audio_processing/aec3/spectrum_buffer.h" #include "rtc_base/numerics/safe_minmax.h" namespace webrtc { namespace { constexpr std::array kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1}; std::array FormSubbandMap() { std::array map_band_to_subband; size_t subband = 1; for (size_t k = 0; k < map_band_to_subband.size(); ++k) { RTC_DCHECK_LT(subband, kBandBoundaries.size()); if (k >= kBandBoundaries[subband]) { subband++; RTC_DCHECK_LT(k, kBandBoundaries[subband]); } map_band_to_subband[k] = subband - 1; } return map_band_to_subband; } // Defines the size in blocks of the sections that are used for dividing the // linear filter. The sections are split in a non-linear manner so that lower // sections that typically represent the direct path have a larger resolution // than the higher sections which typically represent more reverberant acoustic // paths. std::vector DefineFilterSectionSizes(size_t delay_headroom_blocks, size_t num_blocks, size_t num_sections) { size_t filter_length_blocks = num_blocks - delay_headroom_blocks; std::vector section_sizes(num_sections); size_t remaining_blocks = filter_length_blocks; size_t remaining_sections = num_sections; size_t estimator_size = 2; size_t idx = 0; while (remaining_sections > 1 && remaining_blocks > estimator_size * remaining_sections) { RTC_DCHECK_LT(idx, section_sizes.size()); section_sizes[idx] = estimator_size; remaining_blocks -= estimator_size; remaining_sections--; estimator_size *= 2; idx++; } size_t last_groups_size = remaining_blocks / remaining_sections; for (; idx < num_sections; idx++) { section_sizes[idx] = last_groups_size; } section_sizes[num_sections - 1] += remaining_blocks - last_groups_size * remaining_sections; return section_sizes; } // Forms the limits in blocks for each filter section. Those sections // are used for analyzing the echo estimates and investigating which // linear filter sections contribute most to the echo estimate energy. std::vector SetSectionsBoundaries(size_t delay_headroom_blocks, size_t num_blocks, size_t num_sections) { std::vector estimator_boundaries_blocks(num_sections + 1); if (estimator_boundaries_blocks.size() == 2) { estimator_boundaries_blocks[0] = 0; estimator_boundaries_blocks[1] = num_blocks; return estimator_boundaries_blocks; } RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2); const std::vector section_sizes = DefineFilterSectionSizes(delay_headroom_blocks, num_blocks, estimator_boundaries_blocks.size() - 1); size_t idx = 0; size_t current_size_block = 0; RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size()); estimator_boundaries_blocks[0] = delay_headroom_blocks; for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) { current_size_block++; if (current_size_block >= section_sizes[idx]) { idx = idx + 1; if (idx == section_sizes.size()) { break; } estimator_boundaries_blocks[idx] = k + 1; current_size_block = 0; } } estimator_boundaries_blocks[section_sizes.size()] = num_blocks; return estimator_boundaries_blocks; } std::array SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) { std::array max_erle; std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l); std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h); return max_erle; } } // namespace SignalDependentErleEstimator::SignalDependentErleEstimator( const EchoCanceller3Config& config, size_t num_capture_channels) : min_erle_(config.erle.min), num_sections_(config.erle.num_sections), num_blocks_(config.filter.refined.length_blocks), delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize), band_to_subband_(FormSubbandMap()), max_erle_(SetMaxErleSubbands(config.erle.max_l, config.erle.max_h, band_to_subband_[kFftLengthBy2 / 2])), section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_, num_blocks_, num_sections_)), use_onset_detection_(config.erle.onset_detection), erle_(num_capture_channels), erle_onset_compensated_(num_capture_channels), S2_section_accum_( num_capture_channels, std::vector>(num_sections_)), erle_estimators_( num_capture_channels, std::vector>(num_sections_)), erle_ref_(num_capture_channels), correction_factors_( num_capture_channels, std::vector>(num_sections_)), num_updates_(num_capture_channels), n_active_sections_(num_capture_channels) { RTC_DCHECK_LE(num_sections_, num_blocks_); RTC_DCHECK_GE(num_sections_, 1); Reset(); } SignalDependentErleEstimator::~SignalDependentErleEstimator() = default; void SignalDependentErleEstimator::Reset() { for (size_t ch = 0; ch < erle_.size(); ++ch) { erle_[ch].fill(min_erle_); erle_onset_compensated_[ch].fill(min_erle_); for (auto& erle_estimator : erle_estimators_[ch]) { erle_estimator.fill(min_erle_); } erle_ref_[ch].fill(min_erle_); for (auto& factor : correction_factors_[ch]) { factor.fill(1.0f); } num_updates_[ch].fill(0); n_active_sections_[ch].fill(0); } } // Updates the Erle estimate by analyzing the current input signals. It takes // the render buffer and the filter frequency response in order to do an // estimation of the number of sections of the linear filter that are needed // for getting the majority of the energy in the echo estimate. Based on that // number of sections, it updates the erle estimation by introducing a // correction factor to the erle that is given as an input to this method. void SignalDependentErleEstimator::Update( const RenderBuffer& render_buffer, rtc::ArrayView>> filter_frequency_responses, rtc::ArrayView X2, rtc::ArrayView> Y2, rtc::ArrayView> E2, rtc::ArrayView> average_erle, rtc::ArrayView> average_erle_onset_compensated, const std::vector& converged_filters) { RTC_DCHECK_GT(num_sections_, 1); // Gets the number of filter sections that are needed for achieving 90 % // of the power spectrum energy of the echo estimate. ComputeNumberOfActiveFilterSections(render_buffer, filter_frequency_responses); // Updates the correction factors that is used for correcting the erle and // adapt it to the particular characteristics of the input signal. UpdateCorrectionFactors(X2, Y2, E2, converged_filters); // Applies the correction factor to the input erle for getting a more refined // erle estimation for the current input signal. for (size_t ch = 0; ch < erle_.size(); ++ch) { for (size_t k = 0; k < kFftLengthBy2; ++k) { RTC_DCHECK_GT(correction_factors_[ch].size(), n_active_sections_[ch][k]); float correction_factor = correction_factors_[ch][n_active_sections_[ch][k]] [band_to_subband_[k]]; erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor, min_erle_, max_erle_[band_to_subband_[k]]); if (use_onset_detection_) { erle_onset_compensated_[ch][k] = rtc::SafeClamp( average_erle_onset_compensated[ch][k] * correction_factor, min_erle_, max_erle_[band_to_subband_[k]]); } } } } void SignalDependentErleEstimator::Dump( const std::unique_ptr& data_dumper) const { for (auto& erle : erle_estimators_[0]) { data_dumper->DumpRaw("aec3_all_erle", erle); } data_dumper->DumpRaw("aec3_ref_erle", erle_ref_[0]); for (auto& factor : correction_factors_[0]) { data_dumper->DumpRaw("aec3_erle_correction_factor", factor); } } // Estimates for each band the smallest number of sections in the filter that // together constitute 90% of the estimated echo energy. void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections( const RenderBuffer& render_buffer, rtc::ArrayView>> filter_frequency_responses) { RTC_DCHECK_GT(num_sections_, 1); // Computes an approximation of the power spectrum if the filter would have // been limited to a certain number of filter sections. ComputeEchoEstimatePerFilterSection(render_buffer, filter_frequency_responses); // For each band, computes the number of filter sections that are needed for // achieving the 90 % energy in the echo estimate. ComputeActiveFilterSections(); } void SignalDependentErleEstimator::UpdateCorrectionFactors( rtc::ArrayView X2, rtc::ArrayView> Y2, rtc::ArrayView> E2, const std::vector& converged_filters) { for (size_t ch = 0; ch < converged_filters.size(); ++ch) { if (converged_filters[ch]) { constexpr float kX2BandEnergyThreshold = 44015068.0f; constexpr float kSmthConstantDecreases = 0.1f; constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f; auto subband_powers = [](rtc::ArrayView power_spectrum, rtc::ArrayView power_spectrum_subbands) { for (size_t subband = 0; subband < kSubbands; ++subband) { RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size()); power_spectrum_subbands[subband] = std::accumulate( power_spectrum.begin() + kBandBoundaries[subband], power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f); } }; std::array X2_subbands, E2_subbands, Y2_subbands; subband_powers(X2, X2_subbands); subband_powers(E2[ch], E2_subbands); subband_powers(Y2[ch], Y2_subbands); std::array idx_subbands; for (size_t subband = 0; subband < kSubbands; ++subband) { // When aggregating the number of active sections in the filter for // different bands we choose to take the minimum of all of them. As an // example, if for one of the bands it is the direct path its refined // contributor to the final echo estimate, we consider the direct path // is as well the refined contributor for the subband that contains that // particular band. That aggregate number of sections will be later used // as the identifier of the erle estimator that needs to be updated. RTC_DCHECK_LE(kBandBoundaries[subband + 1], n_active_sections_[ch].size()); idx_subbands[subband] = *std::min_element( n_active_sections_[ch].begin() + kBandBoundaries[subband], n_active_sections_[ch].begin() + kBandBoundaries[subband + 1]); } std::array new_erle; std::array is_erle_updated; is_erle_updated.fill(false); new_erle.fill(0.f); for (size_t subband = 0; subband < kSubbands; ++subband) { if (X2_subbands[subband] > kX2BandEnergyThreshold && E2_subbands[subband] > 0) { new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband]; RTC_DCHECK_GT(new_erle[subband], 0); is_erle_updated[subband] = true; ++num_updates_[ch][subband]; } } for (size_t subband = 0; subband < kSubbands; ++subband) { const size_t idx = idx_subbands[subband]; RTC_DCHECK_LT(idx, erle_estimators_[ch].size()); float alpha = new_erle[subband] > erle_estimators_[ch][idx][subband] ? kSmthConstantIncreases : kSmthConstantDecreases; alpha = static_cast(is_erle_updated[subband]) * alpha; erle_estimators_[ch][idx][subband] += alpha * (new_erle[subband] - erle_estimators_[ch][idx][subband]); erle_estimators_[ch][idx][subband] = rtc::SafeClamp( erle_estimators_[ch][idx][subband], min_erle_, max_erle_[subband]); } for (size_t subband = 0; subband < kSubbands; ++subband) { float alpha = new_erle[subband] > erle_ref_[ch][subband] ? kSmthConstantIncreases : kSmthConstantDecreases; alpha = static_cast(is_erle_updated[subband]) * alpha; erle_ref_[ch][subband] += alpha * (new_erle[subband] - erle_ref_[ch][subband]); erle_ref_[ch][subband] = rtc::SafeClamp(erle_ref_[ch][subband], min_erle_, max_erle_[subband]); } for (size_t subband = 0; subband < kSubbands; ++subband) { constexpr int kNumUpdateThr = 50; if (is_erle_updated[subband] && num_updates_[ch][subband] > kNumUpdateThr) { const size_t idx = idx_subbands[subband]; RTC_DCHECK_GT(erle_ref_[ch][subband], 0.f); // Computes the ratio between the erle that is updated using all the // points and the erle that is updated only on signals that share the // same number of active filter sections. float new_correction_factor = erle_estimators_[ch][idx][subband] / erle_ref_[ch][subband]; correction_factors_[ch][idx][subband] += 0.1f * (new_correction_factor - correction_factors_[ch][idx][subband]); } } } } } void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection( const RenderBuffer& render_buffer, rtc::ArrayView>> filter_frequency_responses) { const SpectrumBuffer& spectrum_render_buffer = render_buffer.GetSpectrumBuffer(); const size_t num_render_channels = spectrum_render_buffer.buffer[0].size(); const size_t num_capture_channels = S2_section_accum_.size(); const float one_by_num_render_channels = 1.f / num_render_channels; RTC_DCHECK_EQ(S2_section_accum_.size(), filter_frequency_responses.size()); for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) { RTC_DCHECK_EQ(S2_section_accum_[capture_ch].size() + 1, section_boundaries_blocks_.size()); size_t idx_render = render_buffer.Position(); idx_render = spectrum_render_buffer.OffsetIndex( idx_render, section_boundaries_blocks_[0]); for (size_t section = 0; section < num_sections_; ++section) { std::array X2_section; std::array H2_section; X2_section.fill(0.f); H2_section.fill(0.f); const size_t block_limit = std::min(section_boundaries_blocks_[section + 1], filter_frequency_responses[capture_ch].size()); for (size_t block = section_boundaries_blocks_[section]; block < block_limit; ++block) { for (size_t render_ch = 0; render_ch < spectrum_render_buffer.buffer[idx_render].size(); ++render_ch) { for (size_t k = 0; k < X2_section.size(); ++k) { X2_section[k] += spectrum_render_buffer.buffer[idx_render][render_ch][k] * one_by_num_render_channels; } } std::transform(H2_section.begin(), H2_section.end(), filter_frequency_responses[capture_ch][block].begin(), H2_section.begin(), std::plus()); idx_render = spectrum_render_buffer.IncIndex(idx_render); } std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(), S2_section_accum_[capture_ch][section].begin(), std::multiplies()); } for (size_t section = 1; section < num_sections_; ++section) { std::transform(S2_section_accum_[capture_ch][section - 1].begin(), S2_section_accum_[capture_ch][section - 1].end(), S2_section_accum_[capture_ch][section].begin(), S2_section_accum_[capture_ch][section].begin(), std::plus()); } } } void SignalDependentErleEstimator::ComputeActiveFilterSections() { for (size_t ch = 0; ch < n_active_sections_.size(); ++ch) { std::fill(n_active_sections_[ch].begin(), n_active_sections_[ch].end(), 0); for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { size_t section = num_sections_; float target = 0.9f * S2_section_accum_[ch][num_sections_ - 1][k]; while (section > 0 && S2_section_accum_[ch][section - 1][k] >= target) { n_active_sections_[ch][k] = --section; } } } } } // namespace webrtc