diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc')
-rw-r--r-- | third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc new file mode 100644 index 0000000000..e8edab602c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" + +#include <algorithm> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +constexpr int kHeadroomHistogramMin = 0; +constexpr int kHeadroomHistogramMax = 50; +constexpr int kGainDbHistogramMax = 30; + +// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`. +// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a +// safety margin to allow transient peaks to exceed the target peak level +// without clipping. +float ComputeGainDb(float input_level_dbfs, + const AdaptiveDigitalConfig& config) { + // If the level is very low, apply the maximum gain. + if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) { + return config.max_gain_db; + } + // We expect to end up here most of the time: the level is below + // -headroom, but we can boost it to -headroom. + if (input_level_dbfs < -config.headroom_db) { + return -config.headroom_db - input_level_dbfs; + } + // The level is too high and we can't boost. + RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db); + return 0.0f; +} + +// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs` +// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns +// `target_gain_db` so that the output noise level equals +// `max_output_noise_level_dbfs`. +float LimitGainByNoise(float target_gain_db, + float input_noise_level_dbfs, + float max_output_noise_level_dbfs, + ApmDataDumper& apm_data_dumper) { + const float max_allowed_gain_db = + max_output_noise_level_dbfs - input_noise_level_dbfs; + apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db", + max_allowed_gain_db); + return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f)); +} + +float LimitGainByLowConfidence(float target_gain_db, + float last_gain_db, + float limiter_audio_level_dbfs, + bool estimate_is_confident) { + if (estimate_is_confident || + limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) { + return target_gain_db; + } + const float limiter_level_dbfs_before_gain = + limiter_audio_level_dbfs - last_gain_db; + + // Compute a new gain so that `limiter_level_dbfs_before_gain` + + // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`. + const float new_target_gain_db = std::max( + kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f); + return std::min(new_target_gain_db, target_gain_db); +} + +// Computes how the gain should change during this frame. +// Return the gain difference in db to 'last_gain_db'. +float ComputeGainChangeThisFrameDb(float target_gain_db, + float last_gain_db, + bool gain_increase_allowed, + float max_gain_decrease_db, + float max_gain_increase_db) { + RTC_DCHECK_GT(max_gain_decrease_db, 0); + RTC_DCHECK_GT(max_gain_increase_db, 0); + float target_gain_difference_db = target_gain_db - last_gain_db; + if (!gain_increase_allowed) { + target_gain_difference_db = std::min(target_gain_difference_db, 0.0f); + } + return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db, + max_gain_increase_db); +} + +} // namespace + +AdaptiveDigitalGainController::AdaptiveDigitalGainController( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold) + : apm_data_dumper_(apm_data_dumper), + gain_applier_( + /*hard_clip_samples=*/false, + /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)), + config_(config), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), + max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second * + kFrameDurationMs / 1000.0f), + calls_since_last_gain_log_(0), + frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold), + last_gain_db_(config_.initial_gain_db) { + RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f); + RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); + RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f); + RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f); +} + +void AdaptiveDigitalGainController::Process(const FrameInfo& info, + AudioFrameView<float> frame) { + RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f); + RTC_DCHECK_GE(frame.num_channels(), 1); + RTC_DCHECK( + frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 || + frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480) + << "`frame` does not look like a 10 ms frame for an APM supported sample " + "rate"; + + // Compute the input level used to select the desired gain. + RTC_DCHECK_GT(info.headroom_db, 0.0f); + const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db; + + const float target_gain_db = LimitGainByLowConfidence( + LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_), + info.noise_rms_dbfs, config_.max_output_noise_level_dbfs, + *apm_data_dumper_), + last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable); + + // Forbid increasing the gain until enough adjacent speech frames are + // observed. + bool first_confident_speech_frame = false; + if (info.speech_probability < kVadConfidenceThreshold) { + frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_; + } else if (frames_to_gain_increase_allowed_ > 0) { + frames_to_gain_increase_allowed_--; + first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0; + } + apm_data_dumper_->DumpRaw( + "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed", + frames_to_gain_increase_allowed_); + + const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0; + + float max_gain_increase_db = max_gain_change_db_per_10ms_; + if (first_confident_speech_frame) { + // No gain increase happened while waiting for a long enough speech + // sequence. Therefore, temporarily allow a faster gain increase. + RTC_DCHECK(gain_increase_allowed); + max_gain_increase_db *= adjacent_speech_frames_threshold_; + } + + const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( + target_gain_db, last_gain_db_, gain_increase_allowed, + /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_, + max_gain_increase_db); + + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db", + target_gain_db - last_gain_db_); + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db", + gain_change_this_frame_db); + + // Optimization: avoid calling math functions if gain does not + // change. + if (gain_change_this_frame_db != 0.f) { + gain_applier_.SetGainFactor( + DbToRatio(last_gain_db_ + gain_change_this_frame_db)); + } + + gain_applier_.ApplyGain(frame); + + // Remember that the gain has changed for the next iteration. + last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db", + last_gain_db_); + + // Log every 10 seconds. + calls_since_last_gain_log_++; + if (calls_since_last_gain_log_ == 1000) { + calls_since_last_gain_log_ = 0; + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel", + -info.speech_level_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", + -info.noise_rms_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin, + kHeadroomHistogramMax, + kHeadroomHistogramMax - kHeadroomHistogramMin + 1); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", + last_gain_db_, 0, kGainDbHistogramMax, + kGainDbHistogramMax + 1); + RTC_LOG(LS_INFO) << "AGC2 adaptive digital" + << " | speech_dbfs: " << info.speech_level_dbfs + << " | noise_dbfs: " << info.noise_rms_dbfs + << " | headroom_db: " << info.headroom_db + << " | gain_db: " << last_gain_db_; + } +} + +} // namespace webrtc |