/* * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_processing/vad/voice_activity_detector.h" #include #include "rtc_base/checks.h" namespace webrtc { namespace { const size_t kNumChannels = 1; const double kDefaultVoiceValue = 1.0; const double kNeutralProbability = 0.5; const double kLowProbability = 0.01; } // namespace VoiceActivityDetector::VoiceActivityDetector() : last_voice_probability_(kDefaultVoiceValue), standalone_vad_(StandaloneVad::Create()) {} VoiceActivityDetector::~VoiceActivityDetector() = default; // Because ISAC has a different chunk length, it updates // `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data. // Otherwise it clears them. void VoiceActivityDetector::ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz) { RTC_DCHECK_EQ(length, sample_rate_hz / 100); // TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio. // Resample to the required rate. const int16_t* resampled_ptr = audio; if (sample_rate_hz != kSampleRateHz) { RTC_CHECK_EQ( resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), 0); resampler_.Push(audio, length, resampled_, kLength10Ms, length); resampled_ptr = resampled_; } RTC_DCHECK_EQ(length, kLength10Ms); // Each chunk needs to be passed into `standalone_vad_`, because internally it // buffers the audio and processes it all at once when GetActivity() is // called. RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); chunkwise_voice_probabilities_.resize(features_.num_frames); chunkwise_rms_.resize(features_.num_frames); std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), chunkwise_rms_.begin()); if (features_.num_frames > 0) { if (features_.silence) { // The other features are invalid, so set the voice probabilities to an // arbitrary low value. std::fill(chunkwise_voice_probabilities_.begin(), chunkwise_voice_probabilities_.end(), kLowProbability); } else { std::fill(chunkwise_voice_probabilities_.begin(), chunkwise_voice_probabilities_.end(), kNeutralProbability); RTC_CHECK_GE( standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], chunkwise_voice_probabilities_.size()), 0); RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( features_, &chunkwise_voice_probabilities_[0]), 0); } last_voice_probability_ = chunkwise_voice_probabilities_.back(); } } } // namespace webrtc