summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 00:47:55 +0000
commit26a029d407be480d791972afb5975cf62c9360a6 (patch)
treef435a8308119effd964b339f76abb83a57c29483 /third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc
parentInitial commit. (diff)
downloadfirefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc')
-rw-r--r--third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc120
1 files changed, 120 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc
new file mode 100644
index 0000000000..68e60dc66a
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/vad/pitch_based_vad.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/vad/pitch_based_vad.h"
+
+#include <string.h>
+
+#include "modules/audio_processing/vad/common.h"
+#include "modules/audio_processing/vad/noise_gmm_tables.h"
+#include "modules/audio_processing/vad/vad_circular_buffer.h"
+#include "modules/audio_processing/vad/voice_gmm_tables.h"
+
+namespace webrtc {
+
+static_assert(kNoiseGmmDim == kVoiceGmmDim,
+ "noise and voice gmm dimension not equal");
+
+// These values should match MATLAB counterparts for unit-tests to pass.
+static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
+static const double kInitialPriorProbability = 0.3;
+static const int kTransientWidthThreshold = 7;
+static const double kLowProbabilityThreshold = 0.2;
+
+static double LimitProbability(double p) {
+ const double kLimHigh = 0.99;
+ const double kLimLow = 0.01;
+
+ if (p > kLimHigh)
+ p = kLimHigh;
+ else if (p < kLimLow)
+ p = kLimLow;
+ return p;
+}
+
+PitchBasedVad::PitchBasedVad()
+ : p_prior_(kInitialPriorProbability),
+ circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
+ // Setup noise GMM.
+ noise_gmm_.dimension = kNoiseGmmDim;
+ noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
+ noise_gmm_.weight = kNoiseGmmWeights;
+ noise_gmm_.mean = &kNoiseGmmMean[0][0];
+ noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
+
+ // Setup voice GMM.
+ voice_gmm_.dimension = kVoiceGmmDim;
+ voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
+ voice_gmm_.weight = kVoiceGmmWeights;
+ voice_gmm_.mean = &kVoiceGmmMean[0][0];
+ voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
+}
+
+PitchBasedVad::~PitchBasedVad() {}
+
+int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
+ double* p_combined) {
+ double p;
+ double gmm_features[3];
+ double pdf_features_given_voice;
+ double pdf_features_given_noise;
+ // These limits are the same in matlab implementation 'VoicingProbGMM().'
+ const double kLimLowLogPitchGain = -2.0;
+ const double kLimHighLogPitchGain = -0.9;
+ const double kLimLowSpectralPeak = 200;
+ const double kLimHighSpectralPeak = 2000;
+ const double kEps = 1e-12;
+ for (size_t n = 0; n < features.num_frames; n++) {
+ gmm_features[0] = features.log_pitch_gain[n];
+ gmm_features[1] = features.spectral_peak[n];
+ gmm_features[2] = features.pitch_lag_hz[n];
+
+ pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
+ pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
+
+ if (features.spectral_peak[n] < kLimLowSpectralPeak ||
+ features.spectral_peak[n] > kLimHighSpectralPeak ||
+ features.log_pitch_gain[n] < kLimLowLogPitchGain) {
+ pdf_features_given_voice = kEps * pdf_features_given_noise;
+ } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
+ pdf_features_given_noise = kEps * pdf_features_given_voice;
+ }
+
+ p = p_prior_ * pdf_features_given_voice /
+ (pdf_features_given_voice * p_prior_ +
+ pdf_features_given_noise * (1 - p_prior_));
+
+ p = LimitProbability(p);
+
+ // Combine pitch-based probability with standalone probability, before
+ // updating prior probabilities.
+ double prod_active = p * p_combined[n];
+ double prod_inactive = (1 - p) * (1 - p_combined[n]);
+ p_combined[n] = prod_active / (prod_active + prod_inactive);
+
+ if (UpdatePrior(p_combined[n]) < 0)
+ return -1;
+ // Limit prior probability. With a zero prior probability the posterior
+ // probability is always zero.
+ p_prior_ = LimitProbability(p_prior_);
+ }
+ return 0;
+}
+
+int PitchBasedVad::UpdatePrior(double p) {
+ circular_buffer_->Insert(p);
+ if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
+ kLowProbabilityThreshold) < 0)
+ return -1;
+ p_prior_ = circular_buffer_->Mean();
+ return 0;
+}
+
+} // namespace webrtc