/* * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ #define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ #include #include #include #include "api/array_view.h" #include "modules/audio_processing/agc2/rnn_vad/common.h" namespace webrtc { namespace rnn_vad { // At a sample rate of 24 kHz, the last 3 Opus bands are beyond the Nyquist // frequency. However, band #19 gets the contributions from band #18 because // of the symmetric triangular filter with peak response at 12 kHz. constexpr int kOpusBands24kHz = 20; static_assert(kOpusBands24kHz < kNumBands, "The number of bands at 24 kHz must be less than those defined " "in the Opus scale at 48 kHz."); // Number of FFT frequency bins covered by each band in the Opus scale at a // sample rate of 24 kHz for 20 ms frames. // Declared here for unit testing. constexpr std::array GetOpusScaleNumBins24kHz20ms() { return {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 24, 24, 32, 48}; } // TODO(bugs.webrtc.org/10480): Move to a separate file. // Class to compute band-wise spectral features in the Opus perceptual scale // for 20 ms frames sampled at 24 kHz. The analysis methods apply triangular // filters with peak response at the each band boundary. class SpectralCorrelator { public: // Ctor. SpectralCorrelator(); SpectralCorrelator(const SpectralCorrelator&) = delete; SpectralCorrelator& operator=(const SpectralCorrelator&) = delete; ~SpectralCorrelator(); // Computes the band-wise spectral auto-correlations. // `x` must: // - have size equal to `kFrameSize20ms24kHz`; // - be encoded as vectors of interleaved real-complex FFT coefficients // where x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). void ComputeAutoCorrelation( rtc::ArrayView x, rtc::ArrayView auto_corr) const; // Computes the band-wise spectral cross-correlations. // `x` and `y` must: // - have size equal to `kFrameSize20ms24kHz`; // - be encoded as vectors of interleaved real-complex FFT coefficients where // x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). void ComputeCrossCorrelation( rtc::ArrayView x, rtc::ArrayView y, rtc::ArrayView cross_corr) const; private: const std::vector weights_; // Weights for each Fourier coefficient. }; // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in // spectral_features.cc. Given a vector of Opus-bands energy coefficients, // computes the log magnitude spectrum applying smoothing both over time and // over frequency. Declared here for unit testing. void ComputeSmoothedLogMagnitudeSpectrum( rtc::ArrayView bands_energy, rtc::ArrayView log_bands_energy); // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in // spectral_features.cc. Creates a DCT table for arrays having size equal to // `kNumBands`. Declared here for unit testing. std::array ComputeDctTable(); // TODO(bugs.webrtc.org/10480): Move to anonymous namespace in // spectral_features.cc. Computes DCT for `in` given a pre-computed DCT table. // In-place computation is not allowed and `out` can be smaller than `in` in // order to only compute the first DCT coefficients. Declared here for unit // testing. void ComputeDct(rtc::ArrayView in, rtc::ArrayView dct_table, rtc::ArrayView out); } // namespace rnn_vad } // namespace webrtc #endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_