summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc')
-rw-r--r--third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc384
1 files changed, 384 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc
new file mode 100644
index 0000000000..fd759c63e8
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/agc2/clipping_predictor.cc
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/clipping_predictor.h"
+
+#include <algorithm>
+#include <memory>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h"
+#include "modules/audio_processing/agc2/gain_map_internal.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+namespace {
+
+constexpr int kClippingPredictorMaxGainChange = 15;
+
+// Returns an input volume in the [`min_input_volume`, `max_input_volume`] range
+// that reduces `gain_error_db`, which is a gain error estimated when
+// `input_volume` was applied, according to a fixed gain map.
+int ComputeVolumeUpdate(int gain_error_db,
+ int input_volume,
+ int min_input_volume,
+ int max_input_volume) {
+ RTC_DCHECK_GE(input_volume, 0);
+ RTC_DCHECK_LE(input_volume, max_input_volume);
+ if (gain_error_db == 0) {
+ return input_volume;
+ }
+ int new_volume = input_volume;
+ if (gain_error_db > 0) {
+ while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db &&
+ new_volume < max_input_volume) {
+ ++new_volume;
+ }
+ } else {
+ while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db &&
+ new_volume > min_input_volume) {
+ --new_volume;
+ }
+ }
+ return new_volume;
+}
+
+float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
+ const float crest_factor =
+ FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
+ return crest_factor;
+}
+
+// Crest factor-based clipping prediction and clipped level step estimation.
+class ClippingEventPredictor : public ClippingPredictor {
+ public:
+ // ClippingEventPredictor with `num_channels` channels (limited to values
+ // higher than zero); window size `window_length` and reference window size
+ // `reference_window_length` (both referring to the number of frames in the
+ // respective sliding windows and limited to values higher than zero);
+ // reference window delay `reference_window_delay` (delay in frames, limited
+ // to values zero and higher with an additional requirement of
+ // `window_length` < `reference_window_length` + reference_window_delay`);
+ // and an estimation peak threshold `clipping_threshold` and a crest factor
+ // drop threshold `crest_factor_margin` (both in dB).
+ ClippingEventPredictor(int num_channels,
+ int window_length,
+ int reference_window_length,
+ int reference_window_delay,
+ float clipping_threshold,
+ float crest_factor_margin)
+ : window_length_(window_length),
+ reference_window_length_(reference_window_length),
+ reference_window_delay_(reference_window_delay),
+ clipping_threshold_(clipping_threshold),
+ crest_factor_margin_(crest_factor_margin) {
+ RTC_DCHECK_GT(num_channels, 0);
+ RTC_DCHECK_GT(window_length, 0);
+ RTC_DCHECK_GT(reference_window_length, 0);
+ RTC_DCHECK_GE(reference_window_delay, 0);
+ RTC_DCHECK_GT(reference_window_length + reference_window_delay,
+ window_length);
+ const int buffer_length = GetMinFramesProcessed();
+ RTC_DCHECK_GT(buffer_length, 0);
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_.push_back(
+ std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
+ }
+ }
+
+ ClippingEventPredictor(const ClippingEventPredictor&) = delete;
+ ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
+ ~ClippingEventPredictor() {}
+
+ void Reset() {
+ const int num_channels = ch_buffers_.size();
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_[i]->Reset();
+ }
+ }
+
+ // Analyzes a frame of audio and stores the framewise metrics in
+ // `ch_buffers_`.
+ void Analyze(const AudioFrameView<const float>& frame) {
+ const int num_channels = frame.num_channels();
+ RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
+ const int samples_per_channel = frame.samples_per_channel();
+ RTC_DCHECK_GT(samples_per_channel, 0);
+ for (int channel = 0; channel < num_channels; ++channel) {
+ float sum_squares = 0.0f;
+ float peak = 0.0f;
+ for (const auto& sample : frame.channel(channel)) {
+ sum_squares += sample * sample;
+ peak = std::max(std::fabs(sample), peak);
+ }
+ ch_buffers_[channel]->Push(
+ {sum_squares / static_cast<float>(samples_per_channel), peak});
+ }
+ }
+
+ // Estimates the analog gain adjustment for channel `channel` using a
+ // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
+ // estimate for the clipped level step equal to `default_clipped_level_step_`
+ // if at least `GetMinFramesProcessed()` frames have been processed since the
+ // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
+ // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
+ absl::optional<int> EstimateClippedLevelStep(int channel,
+ int level,
+ int default_step,
+ int min_mic_level,
+ int max_mic_level) const {
+ RTC_CHECK_GE(channel, 0);
+ RTC_CHECK_LT(channel, ch_buffers_.size());
+ RTC_DCHECK_GE(level, 0);
+ RTC_DCHECK_LE(level, 255);
+ RTC_DCHECK_GT(default_step, 0);
+ RTC_DCHECK_LE(default_step, 255);
+ RTC_DCHECK_GE(min_mic_level, 0);
+ RTC_DCHECK_LE(min_mic_level, 255);
+ RTC_DCHECK_GE(max_mic_level, 0);
+ RTC_DCHECK_LE(max_mic_level, 255);
+ if (level <= min_mic_level) {
+ return absl::nullopt;
+ }
+ if (PredictClippingEvent(channel)) {
+ const int new_level =
+ rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
+ const int step = level - new_level;
+ if (step > 0) {
+ return step;
+ }
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ int GetMinFramesProcessed() const {
+ return reference_window_delay_ + reference_window_length_;
+ }
+
+ // Predicts clipping events based on the processed audio frames. Returns
+ // true if a clipping event is likely.
+ bool PredictClippingEvent(int channel) const {
+ const auto metrics =
+ ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
+ if (!metrics.has_value() ||
+ !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
+ return false;
+ }
+ const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
+ reference_window_delay_, reference_window_length_);
+ if (!reference_metrics.has_value()) {
+ return false;
+ }
+ const float crest_factor = ComputeCrestFactor(metrics.value());
+ const float reference_crest_factor =
+ ComputeCrestFactor(reference_metrics.value());
+ if (crest_factor < reference_crest_factor - crest_factor_margin_) {
+ return true;
+ }
+ return false;
+ }
+
+ std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
+ const int window_length_;
+ const int reference_window_length_;
+ const int reference_window_delay_;
+ const float clipping_threshold_;
+ const float crest_factor_margin_;
+};
+
+// Performs crest factor-based clipping peak prediction.
+class ClippingPeakPredictor : public ClippingPredictor {
+ public:
+ // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
+ // higher than zero); window size `window_length` and reference window size
+ // `reference_window_length` (both referring to the number of frames in the
+ // respective sliding windows and limited to values higher than zero);
+ // reference window delay `reference_window_delay` (delay in frames, limited
+ // to values zero and higher with an additional requirement of
+ // `window_length` < `reference_window_length` + reference_window_delay`);
+ // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
+ // clipped level step estimation is used if `adaptive_step_estimation` is
+ // true.
+ explicit ClippingPeakPredictor(int num_channels,
+ int window_length,
+ int reference_window_length,
+ int reference_window_delay,
+ int clipping_threshold,
+ bool adaptive_step_estimation)
+ : window_length_(window_length),
+ reference_window_length_(reference_window_length),
+ reference_window_delay_(reference_window_delay),
+ clipping_threshold_(clipping_threshold),
+ adaptive_step_estimation_(adaptive_step_estimation) {
+ RTC_DCHECK_GT(num_channels, 0);
+ RTC_DCHECK_GT(window_length, 0);
+ RTC_DCHECK_GT(reference_window_length, 0);
+ RTC_DCHECK_GE(reference_window_delay, 0);
+ RTC_DCHECK_GT(reference_window_length + reference_window_delay,
+ window_length);
+ const int buffer_length = GetMinFramesProcessed();
+ RTC_DCHECK_GT(buffer_length, 0);
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_.push_back(
+ std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
+ }
+ }
+
+ ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
+ ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
+ ~ClippingPeakPredictor() {}
+
+ void Reset() {
+ const int num_channels = ch_buffers_.size();
+ for (int i = 0; i < num_channels; ++i) {
+ ch_buffers_[i]->Reset();
+ }
+ }
+
+ // Analyzes a frame of audio and stores the framewise metrics in
+ // `ch_buffers_`.
+ void Analyze(const AudioFrameView<const float>& frame) {
+ const int num_channels = frame.num_channels();
+ RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
+ const int samples_per_channel = frame.samples_per_channel();
+ RTC_DCHECK_GT(samples_per_channel, 0);
+ for (int channel = 0; channel < num_channels; ++channel) {
+ float sum_squares = 0.0f;
+ float peak = 0.0f;
+ for (const auto& sample : frame.channel(channel)) {
+ sum_squares += sample * sample;
+ peak = std::max(std::fabs(sample), peak);
+ }
+ ch_buffers_[channel]->Push(
+ {sum_squares / static_cast<float>(samples_per_channel), peak});
+ }
+ }
+
+ // Estimates the analog gain adjustment for channel `channel` using a
+ // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
+ // estimate for the clipped level step (equal to
+ // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
+ // least `GetMinFramesProcessed()` frames have been processed since the last
+ // reset and a clipping event is predicted. `level`, `min_mic_level`, and
+ // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
+ absl::optional<int> EstimateClippedLevelStep(int channel,
+ int level,
+ int default_step,
+ int min_mic_level,
+ int max_mic_level) const {
+ RTC_DCHECK_GE(channel, 0);
+ RTC_DCHECK_LT(channel, ch_buffers_.size());
+ RTC_DCHECK_GE(level, 0);
+ RTC_DCHECK_LE(level, 255);
+ RTC_DCHECK_GT(default_step, 0);
+ RTC_DCHECK_LE(default_step, 255);
+ RTC_DCHECK_GE(min_mic_level, 0);
+ RTC_DCHECK_LE(min_mic_level, 255);
+ RTC_DCHECK_GE(max_mic_level, 0);
+ RTC_DCHECK_LE(max_mic_level, 255);
+ if (level <= min_mic_level) {
+ return absl::nullopt;
+ }
+ absl::optional<float> estimate_db = EstimatePeakValue(channel);
+ if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
+ int step = 0;
+ if (!adaptive_step_estimation_) {
+ step = default_step;
+ } else {
+ const int estimated_gain_change =
+ rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
+ -kClippingPredictorMaxGainChange, 0);
+ step =
+ std::max(level - ComputeVolumeUpdate(estimated_gain_change, level,
+ min_mic_level, max_mic_level),
+ default_step);
+ }
+ const int new_level =
+ rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
+ if (level > new_level) {
+ return level - new_level;
+ }
+ }
+ return absl::nullopt;
+ }
+
+ private:
+ int GetMinFramesProcessed() {
+ return reference_window_delay_ + reference_window_length_;
+ }
+
+ // Predicts clipping sample peaks based on the processed audio frames.
+ // Returns the estimated peak value if clipping is predicted. Otherwise
+ // returns absl::nullopt.
+ absl::optional<float> EstimatePeakValue(int channel) const {
+ const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
+ reference_window_delay_, reference_window_length_);
+ if (!reference_metrics.has_value()) {
+ return absl::nullopt;
+ }
+ const auto metrics =
+ ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
+ if (!metrics.has_value() ||
+ !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
+ return absl::nullopt;
+ }
+ const float reference_crest_factor =
+ ComputeCrestFactor(reference_metrics.value());
+ const float& mean_squares = metrics.value().average;
+ const float projected_peak =
+ reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
+ return projected_peak;
+ }
+
+ std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
+ const int window_length_;
+ const int reference_window_length_;
+ const int reference_window_delay_;
+ const int clipping_threshold_;
+ const bool adaptive_step_estimation_;
+};
+
+} // namespace
+
+std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
+ int num_channels,
+ const AudioProcessing::Config::GainController1::AnalogGainController::
+ ClippingPredictor& config) {
+ if (!config.enabled) {
+ RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled.";
+ return nullptr;
+ }
+ RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled.";
+ using ClippingPredictorMode = AudioProcessing::Config::GainController1::
+ AnalogGainController::ClippingPredictor::Mode;
+ switch (config.mode) {
+ case ClippingPredictorMode::kClippingEventPrediction:
+ return std::make_unique<ClippingEventPredictor>(
+ num_channels, config.window_length, config.reference_window_length,
+ config.reference_window_delay, config.clipping_threshold,
+ config.crest_factor_margin);
+ case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
+ return std::make_unique<ClippingPeakPredictor>(
+ num_channels, config.window_length, config.reference_window_length,
+ config.reference_window_delay, config.clipping_threshold,
+ /*adaptive_step_estimation=*/true);
+ case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
+ return std::make_unique<ClippingPeakPredictor>(
+ num_channels, config.window_length, config.reference_window_length,
+ config.reference_window_delay, config.clipping_threshold,
+ /*adaptive_step_estimation=*/false);
+ }
+ RTC_DCHECK_NOTREACHED();
+}
+
+} // namespace webrtc