summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc')
-rw-r--r--third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc188
1 files changed, 188 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc
new file mode 100644
index 0000000000..a10b0f7ec9
--- /dev/null
+++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+
+#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_compare.h"
+
+namespace webrtc {
+namespace rnn_vad {
+namespace {
+
+// Weights for each FFT coefficient for each Opus band (Nyquist frequency
+// excluded). The size of each band is specified in
+// `kOpusScaleNumBins24kHz20ms`.
+constexpr std::array<float, kFrameSize20ms24kHz / 2> kOpusBandWeights24kHz20ms =
+ {{
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 0
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 1
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 2
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 3
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 4
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 5
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 6
+ 0.f, 0.25f, 0.5f, 0.75f, // Band 7
+ 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
+ 0.625f, 0.75f, 0.875f, // Band 8
+ 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
+ 0.625f, 0.75f, 0.875f, // Band 9
+ 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
+ 0.625f, 0.75f, 0.875f, // Band 10
+ 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
+ 0.625f, 0.75f, 0.875f, // Band 11
+ 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f,
+ 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f,
+ 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f,
+ 0.9375f, // Band 12
+ 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f,
+ 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f,
+ 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f,
+ 0.9375f, // Band 13
+ 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f,
+ 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f,
+ 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f,
+ 0.9375f, // Band 14
+ 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f,
+ 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f,
+ 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f,
+ 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f,
+ 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 15
+ 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f,
+ 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f,
+ 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f,
+ 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f,
+ 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 16
+ 0.f, 0.03125f, 0.0625f, 0.09375f, 0.125f,
+ 0.15625f, 0.1875f, 0.21875f, 0.25f, 0.28125f,
+ 0.3125f, 0.34375f, 0.375f, 0.40625f, 0.4375f,
+ 0.46875f, 0.5f, 0.53125f, 0.5625f, 0.59375f,
+ 0.625f, 0.65625f, 0.6875f, 0.71875f, 0.75f,
+ 0.78125f, 0.8125f, 0.84375f, 0.875f, 0.90625f,
+ 0.9375f, 0.96875f, // Band 17
+ 0.f, 0.0208333f, 0.0416667f, 0.0625f, 0.0833333f,
+ 0.104167f, 0.125f, 0.145833f, 0.166667f, 0.1875f,
+ 0.208333f, 0.229167f, 0.25f, 0.270833f, 0.291667f,
+ 0.3125f, 0.333333f, 0.354167f, 0.375f, 0.395833f,
+ 0.416667f, 0.4375f, 0.458333f, 0.479167f, 0.5f,
+ 0.520833f, 0.541667f, 0.5625f, 0.583333f, 0.604167f,
+ 0.625f, 0.645833f, 0.666667f, 0.6875f, 0.708333f,
+ 0.729167f, 0.75f, 0.770833f, 0.791667f, 0.8125f,
+ 0.833333f, 0.854167f, 0.875f, 0.895833f, 0.916667f,
+ 0.9375f, 0.958333f, 0.979167f // Band 18
+ }};
+
+} // namespace
+
+SpectralCorrelator::SpectralCorrelator()
+ : weights_(kOpusBandWeights24kHz20ms.begin(),
+ kOpusBandWeights24kHz20ms.end()) {}
+
+SpectralCorrelator::~SpectralCorrelator() = default;
+
+void SpectralCorrelator::ComputeAutoCorrelation(
+ rtc::ArrayView<const float> x,
+ rtc::ArrayView<float, kOpusBands24kHz> auto_corr) const {
+ ComputeCrossCorrelation(x, x, auto_corr);
+}
+
+void SpectralCorrelator::ComputeCrossCorrelation(
+ rtc::ArrayView<const float> x,
+ rtc::ArrayView<const float> y,
+ rtc::ArrayView<float, kOpusBands24kHz> cross_corr) const {
+ RTC_DCHECK_EQ(x.size(), kFrameSize20ms24kHz);
+ RTC_DCHECK_EQ(x.size(), y.size());
+ RTC_DCHECK_EQ(x[1], 0.f) << "The Nyquist coefficient must be zeroed.";
+ RTC_DCHECK_EQ(y[1], 0.f) << "The Nyquist coefficient must be zeroed.";
+ constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms();
+ int k = 0; // Next Fourier coefficient index.
+ cross_corr[0] = 0.f;
+ for (int i = 0; i < kOpusBands24kHz - 1; ++i) {
+ cross_corr[i + 1] = 0.f;
+ for (int j = 0; j < kOpusScaleNumBins24kHz20ms[i]; ++j) { // Band size.
+ const float v = x[2 * k] * y[2 * k] + x[2 * k + 1] * y[2 * k + 1];
+ const float tmp = weights_[k] * v;
+ cross_corr[i] += v - tmp;
+ cross_corr[i + 1] += tmp;
+ k++;
+ }
+ }
+ cross_corr[0] *= 2.f; // The first band only gets half contribution.
+ RTC_DCHECK_EQ(k, kFrameSize20ms24kHz / 2); // Nyquist coefficient never used.
+}
+
+void ComputeSmoothedLogMagnitudeSpectrum(
+ rtc::ArrayView<const float> bands_energy,
+ rtc::ArrayView<float, kNumBands> log_bands_energy) {
+ RTC_DCHECK_LE(bands_energy.size(), kNumBands);
+ constexpr float kOneByHundred = 1e-2f;
+ constexpr float kLogOneByHundred = -2.f;
+ // Init.
+ float log_max = kLogOneByHundred;
+ float follow = kLogOneByHundred;
+ const auto smooth = [&log_max, &follow](float x) {
+ x = std::max(log_max - 7.f, std::max(follow - 1.5f, x));
+ log_max = std::max(log_max, x);
+ follow = std::max(follow - 1.5f, x);
+ return x;
+ };
+ // Smoothing over the bands for which the band energy is defined.
+ for (int i = 0; rtc::SafeLt(i, bands_energy.size()); ++i) {
+ log_bands_energy[i] = smooth(std::log10(kOneByHundred + bands_energy[i]));
+ }
+ // Smoothing over the remaining bands (zero energy).
+ for (int i = bands_energy.size(); i < kNumBands; ++i) {
+ log_bands_energy[i] = smooth(kLogOneByHundred);
+ }
+}
+
+std::array<float, kNumBands * kNumBands> ComputeDctTable() {
+ std::array<float, kNumBands * kNumBands> dct_table;
+ const double k = std::sqrt(0.5);
+ for (int i = 0; i < kNumBands; ++i) {
+ for (int j = 0; j < kNumBands; ++j)
+ dct_table[i * kNumBands + j] = std::cos((i + 0.5) * j * kPi / kNumBands);
+ dct_table[i * kNumBands] *= k;
+ }
+ return dct_table;
+}
+
+void ComputeDct(rtc::ArrayView<const float> in,
+ rtc::ArrayView<const float, kNumBands * kNumBands> dct_table,
+ rtc::ArrayView<float> out) {
+ // DCT scaling factor - i.e., sqrt(2 / kNumBands).
+ constexpr float kDctScalingFactor = 0.301511345f;
+ constexpr float kDctScalingFactorError =
+ kDctScalingFactor * kDctScalingFactor -
+ 2.f / static_cast<float>(kNumBands);
+ static_assert(
+ (kDctScalingFactorError >= 0.f && kDctScalingFactorError < 1e-1f) ||
+ (kDctScalingFactorError < 0.f && kDctScalingFactorError > -1e-1f),
+ "kNumBands changed and kDctScalingFactor has not been updated.");
+ RTC_DCHECK_NE(in.data(), out.data()) << "In-place DCT is not supported.";
+ RTC_DCHECK_LE(in.size(), kNumBands);
+ RTC_DCHECK_LE(1, out.size());
+ RTC_DCHECK_LE(out.size(), in.size());
+ for (int i = 0; rtc::SafeLt(i, out.size()); ++i) {
+ out[i] = 0.f;
+ for (int j = 0; rtc::SafeLt(j, in.size()); ++j) {
+ out[i] += in[j] * dct_table[j * kNumBands + i];
+ }
+ // TODO(bugs.webrtc.org/10480): Scaling factor in the DCT table.
+ out[i] *= kDctScalingFactor;
+ }
+}
+
+} // namespace rnn_vad
+} // namespace webrtc