diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc')
-rw-r--r-- | third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc | 384 |
1 files changed, 384 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc new file mode 100644 index 0000000000..fd759c63e8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/clipping_predictor.h" + +#include <algorithm> +#include <memory> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h" +#include "modules/audio_processing/agc2/gain_map_internal.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +constexpr int kClippingPredictorMaxGainChange = 15; + +// Returns an input volume in the [`min_input_volume`, `max_input_volume`] range +// that reduces `gain_error_db`, which is a gain error estimated when +// `input_volume` was applied, according to a fixed gain map. +int ComputeVolumeUpdate(int gain_error_db, + int input_volume, + int min_input_volume, + int max_input_volume) { + RTC_DCHECK_GE(input_volume, 0); + RTC_DCHECK_LE(input_volume, max_input_volume); + if (gain_error_db == 0) { + return input_volume; + } + int new_volume = input_volume; + if (gain_error_db > 0) { + while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db && + new_volume < max_input_volume) { + ++new_volume; + } + } else { + while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db && + new_volume > min_input_volume) { + --new_volume; + } + } + return new_volume; +} + +float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) { + const float crest_factor = + FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average)); + return crest_factor; +} + +// Crest factor-based clipping prediction and clipped level step estimation. +class ClippingEventPredictor : public ClippingPredictor { + public: + // ClippingEventPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and an estimation peak threshold `clipping_threshold` and a crest factor + // drop threshold `crest_factor_margin` (both in dB). + ClippingEventPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + float clipping_threshold, + float crest_factor_margin) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + crest_factor_margin_(crest_factor_margin) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); + } + } + + ClippingEventPredictor(const ClippingEventPredictor&) = delete; + ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete; + ~ClippingEventPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView<const float>& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast<float>(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step equal to `default_clipped_level_step_` + // if at least `GetMinFramesProcessed()` frames have been processed since the + // last reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional<int> EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_CHECK_GE(channel, 0); + RTC_CHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + if (PredictClippingEvent(channel)) { + const int new_level = + rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level); + const int step = level - new_level; + if (step > 0) { + return step; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() const { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping events based on the processed audio frames. Returns + // true if a clipping event is likely. + bool PredictClippingEvent(int channel) const { + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return false; + } + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return false; + } + const float crest_factor = ComputeCrestFactor(metrics.value()); + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + if (crest_factor < reference_crest_factor - crest_factor_margin_) { + return true; + } + return false; + } + + std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const float clipping_threshold_; + const float crest_factor_margin_; +}; + +// Performs crest factor-based clipping peak prediction. +class ClippingPeakPredictor : public ClippingPredictor { + public: + // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values + // higher than zero); window size `window_length` and reference window size + // `reference_window_length` (both referring to the number of frames in the + // respective sliding windows and limited to values higher than zero); + // reference window delay `reference_window_delay` (delay in frames, limited + // to values zero and higher with an additional requirement of + // `window_length` < `reference_window_length` + reference_window_delay`); + // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive + // clipped level step estimation is used if `adaptive_step_estimation` is + // true. + explicit ClippingPeakPredictor(int num_channels, + int window_length, + int reference_window_length, + int reference_window_delay, + int clipping_threshold, + bool adaptive_step_estimation) + : window_length_(window_length), + reference_window_length_(reference_window_length), + reference_window_delay_(reference_window_delay), + clipping_threshold_(clipping_threshold), + adaptive_step_estimation_(adaptive_step_estimation) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(window_length, 0); + RTC_DCHECK_GT(reference_window_length, 0); + RTC_DCHECK_GE(reference_window_delay, 0); + RTC_DCHECK_GT(reference_window_length + reference_window_delay, + window_length); + const int buffer_length = GetMinFramesProcessed(); + RTC_DCHECK_GT(buffer_length, 0); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_.push_back( + std::make_unique<ClippingPredictorLevelBuffer>(buffer_length)); + } + } + + ClippingPeakPredictor(const ClippingPeakPredictor&) = delete; + ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete; + ~ClippingPeakPredictor() {} + + void Reset() { + const int num_channels = ch_buffers_.size(); + for (int i = 0; i < num_channels; ++i) { + ch_buffers_[i]->Reset(); + } + } + + // Analyzes a frame of audio and stores the framewise metrics in + // `ch_buffers_`. + void Analyze(const AudioFrameView<const float>& frame) { + const int num_channels = frame.num_channels(); + RTC_DCHECK_EQ(num_channels, ch_buffers_.size()); + const int samples_per_channel = frame.samples_per_channel(); + RTC_DCHECK_GT(samples_per_channel, 0); + for (int channel = 0; channel < num_channels; ++channel) { + float sum_squares = 0.0f; + float peak = 0.0f; + for (const auto& sample : frame.channel(channel)) { + sum_squares += sample * sample; + peak = std::max(std::fabs(sample), peak); + } + ch_buffers_[channel]->Push( + {sum_squares / static_cast<float>(samples_per_channel), peak}); + } + } + + // Estimates the analog gain adjustment for channel `channel` using a + // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an + // estimate for the clipped level step (equal to + // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at + // least `GetMinFramesProcessed()` frames have been processed since the last + // reset and a clipping event is predicted. `level`, `min_mic_level`, and + // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255]. + absl::optional<int> EstimateClippedLevelStep(int channel, + int level, + int default_step, + int min_mic_level, + int max_mic_level) const { + RTC_DCHECK_GE(channel, 0); + RTC_DCHECK_LT(channel, ch_buffers_.size()); + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, 255); + RTC_DCHECK_GT(default_step, 0); + RTC_DCHECK_LE(default_step, 255); + RTC_DCHECK_GE(min_mic_level, 0); + RTC_DCHECK_LE(min_mic_level, 255); + RTC_DCHECK_GE(max_mic_level, 0); + RTC_DCHECK_LE(max_mic_level, 255); + if (level <= min_mic_level) { + return absl::nullopt; + } + absl::optional<float> estimate_db = EstimatePeakValue(channel); + if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) { + int step = 0; + if (!adaptive_step_estimation_) { + step = default_step; + } else { + const int estimated_gain_change = + rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())), + -kClippingPredictorMaxGainChange, 0); + step = + std::max(level - ComputeVolumeUpdate(estimated_gain_change, level, + min_mic_level, max_mic_level), + default_step); + } + const int new_level = + rtc::SafeClamp(level - step, min_mic_level, max_mic_level); + if (level > new_level) { + return level - new_level; + } + } + return absl::nullopt; + } + + private: + int GetMinFramesProcessed() { + return reference_window_delay_ + reference_window_length_; + } + + // Predicts clipping sample peaks based on the processed audio frames. + // Returns the estimated peak value if clipping is predicted. Otherwise + // returns absl::nullopt. + absl::optional<float> EstimatePeakValue(int channel) const { + const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics( + reference_window_delay_, reference_window_length_); + if (!reference_metrics.has_value()) { + return absl::nullopt; + } + const auto metrics = + ch_buffers_[channel]->ComputePartialMetrics(0, window_length_); + if (!metrics.has_value() || + !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) { + return absl::nullopt; + } + const float reference_crest_factor = + ComputeCrestFactor(reference_metrics.value()); + const float& mean_squares = metrics.value().average; + const float projected_peak = + reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares)); + return projected_peak; + } + + std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_; + const int window_length_; + const int reference_window_length_; + const int reference_window_delay_; + const int clipping_threshold_; + const bool adaptive_step_estimation_; +}; + +} // namespace + +std::unique_ptr<ClippingPredictor> CreateClippingPredictor( + int num_channels, + const AudioProcessing::Config::GainController1::AnalogGainController:: + ClippingPredictor& config) { + if (!config.enabled) { + RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled."; + return nullptr; + } + RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled."; + using ClippingPredictorMode = AudioProcessing::Config::GainController1:: + AnalogGainController::ClippingPredictor::Mode; + switch (config.mode) { + case ClippingPredictorMode::kClippingEventPrediction: + return std::make_unique<ClippingEventPredictor>( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + config.crest_factor_margin); + case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction: + return std::make_unique<ClippingPeakPredictor>( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/true); + case ClippingPredictorMode::kFixedStepClippingPeakPrediction: + return std::make_unique<ClippingPeakPredictor>( + num_channels, config.window_length, config.reference_window_length, + config.reference_window_delay, config.clipping_threshold, + /*adaptive_step_estimation=*/false); + } + RTC_DCHECK_NOTREACHED(); +} + +} // namespace webrtc |