/* * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_processing/agc2/saturation_protector.h" #include #include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/agc2/saturation_protector_buffer.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" #include "rtc_base/numerics/safe_minmax.h" namespace webrtc { namespace { constexpr int kPeakEnveloperSuperFrameLengthMs = 400; constexpr float kMinMarginDb = 12.0f; constexpr float kMaxMarginDb = 25.0f; constexpr float kAttack = 0.9988493699365052f; constexpr float kDecay = 0.9997697679981565f; // Saturation protector state. Defined outside of `SaturationProtectorImpl` to // implement check-point and restore ops. struct SaturationProtectorState { bool operator==(const SaturationProtectorState& s) const { return headroom_db == s.headroom_db && peak_delay_buffer == s.peak_delay_buffer && max_peaks_dbfs == s.max_peaks_dbfs && time_since_push_ms == s.time_since_push_ms; } inline bool operator!=(const SaturationProtectorState& s) const { return !(*this == s); } float headroom_db; SaturationProtectorBuffer peak_delay_buffer; float max_peaks_dbfs; int time_since_push_ms; // Time since the last ring buffer push operation. }; // Resets the saturation protector state. void ResetSaturationProtectorState(float initial_headroom_db, SaturationProtectorState& state) { state.headroom_db = initial_headroom_db; state.peak_delay_buffer.Reset(); state.max_peaks_dbfs = kMinLevelDbfs; state.time_since_push_ms = 0; } // Updates `state` by analyzing the estimated speech level `speech_level_dbfs` // and the peak level `peak_dbfs` for an observed frame. `state` must not be // modified without calling this function. void UpdateSaturationProtectorState(float peak_dbfs, float speech_level_dbfs, SaturationProtectorState& state) { // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs); state.time_since_push_ms += kFrameDurationMs; if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) { // Push `max_peaks_dbfs` back into the ring buffer. state.peak_delay_buffer.PushBack(state.max_peaks_dbfs); // Reset. state.max_peaks_dbfs = kMinLevelDbfs; state.time_since_push_ms = 0; } // Update the headroom by comparing the estimated speech level and the delayed // max speech peak. const float delayed_peak_dbfs = state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs); const float difference_db = delayed_peak_dbfs - speech_level_dbfs; if (difference_db > state.headroom_db) { // Attack. state.headroom_db = state.headroom_db * kAttack + difference_db * (1.0f - kAttack); } else { // Decay. state.headroom_db = state.headroom_db * kDecay + difference_db * (1.0f - kDecay); } state.headroom_db = rtc::SafeClamp(state.headroom_db, kMinMarginDb, kMaxMarginDb); } // Saturation protector which recommends a headroom based on the recent peaks. class SaturationProtectorImpl : public SaturationProtector { public: explicit SaturationProtectorImpl(float initial_headroom_db, int adjacent_speech_frames_threshold, ApmDataDumper* apm_data_dumper) : apm_data_dumper_(apm_data_dumper), initial_headroom_db_(initial_headroom_db), adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) { Reset(); } SaturationProtectorImpl(const SaturationProtectorImpl&) = delete; SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete; ~SaturationProtectorImpl() = default; float HeadroomDb() override { return headroom_db_; } void Analyze(float speech_probability, float peak_dbfs, float speech_level_dbfs) override { if (speech_probability < kVadConfidenceThreshold) { // Not a speech frame. if (adjacent_speech_frames_threshold_ > 1) { // When two or more adjacent speech frames are required in order to // update the state, we need to decide whether to discard or confirm the // updates based on the speech sequence length. if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { // First non-speech frame after a long enough sequence of speech // frames. Update the reliable state. reliable_state_ = preliminary_state_; } else if (num_adjacent_speech_frames_ > 0) { // First non-speech frame after a too short sequence of speech frames. // Reset to the last reliable state. preliminary_state_ = reliable_state_; } } num_adjacent_speech_frames_ = 0; } else { // Speech frame observed. num_adjacent_speech_frames_++; // Update preliminary level estimate. UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs, preliminary_state_); if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { // `preliminary_state_` is now reliable. Update the headroom. headroom_db_ = preliminary_state_.headroom_db; } } DumpDebugData(); } void Reset() override { num_adjacent_speech_frames_ = 0; headroom_db_ = initial_headroom_db_; ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_); ResetSaturationProtectorState(initial_headroom_db_, reliable_state_); } private: void DumpDebugData() { apm_data_dumper_->DumpRaw( "agc2_saturation_protector_preliminary_max_peak_dbfs", preliminary_state_.max_peaks_dbfs); apm_data_dumper_->DumpRaw( "agc2_saturation_protector_reliable_max_peak_dbfs", reliable_state_.max_peaks_dbfs); } ApmDataDumper* const apm_data_dumper_; const float initial_headroom_db_; const int adjacent_speech_frames_threshold_; int num_adjacent_speech_frames_; float headroom_db_; SaturationProtectorState preliminary_state_; SaturationProtectorState reliable_state_; }; } // namespace std::unique_ptr CreateSaturationProtector( float initial_headroom_db, int adjacent_speech_frames_threshold, ApmDataDumper* apm_data_dumper) { return std::make_unique( initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper); } } // namespace webrtc