/* * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_processing/agc2/speech_level_estimator.h" #include #include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/include/audio_processing.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/gunit.h" namespace webrtc { namespace { using AdaptiveDigitalConfig = AudioProcessing::Config::GainController2::AdaptiveDigital; // Number of speech frames that the level estimator must observe in order to // become confident about the estimated level. constexpr int kNumFramesToConfidence = kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs; static_assert(kNumFramesToConfidence > 0, ""); constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f; // Provides the `vad_level` value `num_iterations` times to `level_estimator`. void RunOnConstantLevel(int num_iterations, float rms_dbfs, float peak_dbfs, float speech_probability, SpeechLevelEstimator& level_estimator) { for (int i = 0; i < num_iterations; ++i) { level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability); } } constexpr float kNoSpeechProbability = 0.0f; constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f; constexpr float kMaxSpeechProbability = 1.0f; // Level estimator with data dumper. struct TestLevelEstimator { explicit TestLevelEstimator(int adjacent_speech_frames_threshold) : data_dumper(0), estimator(std::make_unique( &data_dumper, AdaptiveDigitalConfig{}, adjacent_speech_frames_threshold)), initial_speech_level_dbfs(estimator->level_dbfs()), level_rms_dbfs(initial_speech_level_dbfs / 2.0f), level_peak_dbfs(initial_speech_level_dbfs / 3.0f) { RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs); RTC_DCHECK_LT(initial_speech_level_dbfs, level_rms_dbfs); RTC_DCHECK_GT(level_rms_dbfs - initial_speech_level_dbfs, 5.0f) << "Adjust `level_rms_dbfs` so that the difference from the initial " "level is wide enough for the tests"; } ApmDataDumper data_dumper; std::unique_ptr estimator; const float initial_speech_level_dbfs; const float level_rms_dbfs; const float level_peak_dbfs; }; // Checks that the level estimator converges to a constant input speech level. TEST(GainController2SpeechLevelEstimator, LevelStabilizes) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs, 0.1f); } // Checks that the level controller does not become confident when too few // speech frames are observed. TEST(GainController2SpeechLevelEstimator, IsNotConfident) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2, level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); EXPECT_FALSE(level_estimator.estimator->is_confident()); } // Checks that the level controller becomes confident when enough speech frames // are observed. TEST(GainController2SpeechLevelEstimator, IsConfident) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); EXPECT_TRUE(level_estimator.estimator->is_confident()); } // Checks that the estimated level is not affected by the level of non-speech // frames. TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); // Simulate speech. RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); // Simulate full-scale non-speech. RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, /*rms_dbfs=*/0.0f, /*peak_dbfs=*/0.0f, kNoSpeechProbability, *level_estimator.estimator); // No estimated level change is expected. EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), estimated_level_dbfs); } // Checks the convergence speed of the estimator before it becomes confident. TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); EXPECT_NEAR(level_estimator.estimator->level_dbfs(), level_estimator.level_rms_dbfs, kConvergenceSpeedTestsLevelTolerance); } // Checks the convergence speed of the estimator after it becomes confident. TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); // Reach confidence using the initial level estimate. RunOnConstantLevel( /*num_iterations=*/kNumFramesToConfidence, /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs, /*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f, kMaxSpeechProbability, *level_estimator.estimator); // No estimate change should occur, but confidence is achieved. ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), level_estimator.initial_speech_level_dbfs); ASSERT_TRUE(level_estimator.estimator->is_confident()); // After confidence. constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds. static_assert( kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, ""); RunOnConstantLevel( /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames, level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); EXPECT_NEAR(level_estimator.estimator->level_dbfs(), level_estimator.level_rms_dbfs, kConvergenceSpeedTestsLevelTolerance); } class SpeechLevelEstimatorParametrization : public ::testing::TestWithParam { protected: int adjacent_speech_frames_threshold() const { return GetParam(); } }; TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) { TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); const float initial_level = level_estimator.estimator->level_dbfs(); ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) { SCOPED_TRACE(i); level_estimator.estimator->Update(level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability); EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); } level_estimator.estimator->Update(level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kLowSpeechProbability); EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); } TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) { TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); const float initial_level = level_estimator.estimator->level_dbfs(); ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { level_estimator.estimator->Update(level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability); } EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs()); } INSTANTIATE_TEST_SUITE_P(GainController2, SpeechLevelEstimatorParametrization, ::testing::Values(1, 9, 17)); } // namespace } // namespace webrtc