summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc')
-rw-r--r--third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc216
1 files changed, 216 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
new file mode 100644
index 0000000000..e8edab602c
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
+
+#include <algorithm>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_minmax.h"
+#include "system_wrappers/include/metrics.h"
+
+namespace webrtc {
+namespace {
+
+using AdaptiveDigitalConfig =
+ AudioProcessing::Config::GainController2::AdaptiveDigital;
+
+constexpr int kHeadroomHistogramMin = 0;
+constexpr int kHeadroomHistogramMax = 50;
+constexpr int kGainDbHistogramMax = 30;
+
+// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`.
+// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a
+// safety margin to allow transient peaks to exceed the target peak level
+// without clipping.
+float ComputeGainDb(float input_level_dbfs,
+ const AdaptiveDigitalConfig& config) {
+ // If the level is very low, apply the maximum gain.
+ if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) {
+ return config.max_gain_db;
+ }
+ // We expect to end up here most of the time: the level is below
+ // -headroom, but we can boost it to -headroom.
+ if (input_level_dbfs < -config.headroom_db) {
+ return -config.headroom_db - input_level_dbfs;
+ }
+ // The level is too high and we can't boost.
+ RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db);
+ return 0.0f;
+}
+
+// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs`
+// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns
+// `target_gain_db` so that the output noise level equals
+// `max_output_noise_level_dbfs`.
+float LimitGainByNoise(float target_gain_db,
+ float input_noise_level_dbfs,
+ float max_output_noise_level_dbfs,
+ ApmDataDumper& apm_data_dumper) {
+ const float max_allowed_gain_db =
+ max_output_noise_level_dbfs - input_noise_level_dbfs;
+ apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
+ max_allowed_gain_db);
+ return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f));
+}
+
+float LimitGainByLowConfidence(float target_gain_db,
+ float last_gain_db,
+ float limiter_audio_level_dbfs,
+ bool estimate_is_confident) {
+ if (estimate_is_confident ||
+ limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
+ return target_gain_db;
+ }
+ const float limiter_level_dbfs_before_gain =
+ limiter_audio_level_dbfs - last_gain_db;
+
+ // Compute a new gain so that `limiter_level_dbfs_before_gain` +
+ // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`.
+ const float new_target_gain_db = std::max(
+ kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f);
+ return std::min(new_target_gain_db, target_gain_db);
+}
+
+// Computes how the gain should change during this frame.
+// Return the gain difference in db to 'last_gain_db'.
+float ComputeGainChangeThisFrameDb(float target_gain_db,
+ float last_gain_db,
+ bool gain_increase_allowed,
+ float max_gain_decrease_db,
+ float max_gain_increase_db) {
+ RTC_DCHECK_GT(max_gain_decrease_db, 0);
+ RTC_DCHECK_GT(max_gain_increase_db, 0);
+ float target_gain_difference_db = target_gain_db - last_gain_db;
+ if (!gain_increase_allowed) {
+ target_gain_difference_db = std::min(target_gain_difference_db, 0.0f);
+ }
+ return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
+ max_gain_increase_db);
+}
+
+} // namespace
+
+AdaptiveDigitalGainController::AdaptiveDigitalGainController(
+ ApmDataDumper* apm_data_dumper,
+ const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
+ int adjacent_speech_frames_threshold)
+ : apm_data_dumper_(apm_data_dumper),
+ gain_applier_(
+ /*hard_clip_samples=*/false,
+ /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
+ config_(config),
+ adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
+ max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
+ kFrameDurationMs / 1000.0f),
+ calls_since_last_gain_log_(0),
+ frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold),
+ last_gain_db_(config_.initial_gain_db) {
+ RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
+ RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
+ RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
+ RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
+}
+
+void AdaptiveDigitalGainController::Process(const FrameInfo& info,
+ AudioFrameView<float> frame) {
+ RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
+ RTC_DCHECK_GE(frame.num_channels(), 1);
+ RTC_DCHECK(
+ frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
+ frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
+ << "`frame` does not look like a 10 ms frame for an APM supported sample "
+ "rate";
+
+ // Compute the input level used to select the desired gain.
+ RTC_DCHECK_GT(info.headroom_db, 0.0f);
+ const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
+
+ const float target_gain_db = LimitGainByLowConfidence(
+ LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_),
+ info.noise_rms_dbfs, config_.max_output_noise_level_dbfs,
+ *apm_data_dumper_),
+ last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
+
+ // Forbid increasing the gain until enough adjacent speech frames are
+ // observed.
+ bool first_confident_speech_frame = false;
+ if (info.speech_probability < kVadConfidenceThreshold) {
+ frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
+ } else if (frames_to_gain_increase_allowed_ > 0) {
+ frames_to_gain_increase_allowed_--;
+ first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
+ }
+ apm_data_dumper_->DumpRaw(
+ "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
+ frames_to_gain_increase_allowed_);
+
+ const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;
+
+ float max_gain_increase_db = max_gain_change_db_per_10ms_;
+ if (first_confident_speech_frame) {
+ // No gain increase happened while waiting for a long enough speech
+ // sequence. Therefore, temporarily allow a faster gain increase.
+ RTC_DCHECK(gain_increase_allowed);
+ max_gain_increase_db *= adjacent_speech_frames_threshold_;
+ }
+
+ const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
+ target_gain_db, last_gain_db_, gain_increase_allowed,
+ /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
+ max_gain_increase_db);
+
+ apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
+ target_gain_db - last_gain_db_);
+ apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
+ gain_change_this_frame_db);
+
+ // Optimization: avoid calling math functions if gain does not
+ // change.
+ if (gain_change_this_frame_db != 0.f) {
+ gain_applier_.SetGainFactor(
+ DbToRatio(last_gain_db_ + gain_change_this_frame_db));
+ }
+
+ gain_applier_.ApplyGain(frame);
+
+ // Remember that the gain has changed for the next iteration.
+ last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
+ apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
+ last_gain_db_);
+
+ // Log every 10 seconds.
+ calls_since_last_gain_log_++;
+ if (calls_since_last_gain_log_ == 1000) {
+ calls_since_last_gain_log_ = 0;
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
+ -info.speech_level_dbfs, 0, 100, 101);
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
+ -info.noise_rms_dbfs, 0, 100, 101);
+ RTC_HISTOGRAM_COUNTS_LINEAR(
+ "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
+ kHeadroomHistogramMax,
+ kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
+ RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
+ last_gain_db_, 0, kGainDbHistogramMax,
+ kGainDbHistogramMax + 1);
+ RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
+ << " | speech_dbfs: " << info.speech_level_dbfs
+ << " | noise_dbfs: " << info.noise_rms_dbfs
+ << " | headroom_db: " << info.headroom_db
+ << " | gain_db: " << last_gain_db_;
+ }
+}
+
+} // namespace webrtc