summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-28 14:29:10 +0000
commit2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
treeb80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility
parentInitial commit. (diff)
downloadfirefox-upstream.tar.xz
firefox-upstream.zip
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility')
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc392
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h137
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc536
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc94
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h86
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc79
-rw-r--r--third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc96
7 files changed, 1420 insertions, 0 deletions
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
new file mode 100644
index 0000000000..0e696d9fff
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@@ -0,0 +1,392 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <limits>
+#include <numeric>
+
+#include "common_audio/include/audio_util.h"
+#include "common_audio/window_generator.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+
+namespace {
+
+const size_t kErbResolution = 2;
+const int kWindowSizeMs = 16;
+const int kChunkSizeMs = 10; // Size provided by APM.
+const float kClipFreqKhz = 0.2f;
+const float kKbdAlpha = 1.5f;
+const float kLambdaBot = -1.f; // Extreme values in bisection
+const float kLambdaTop = -1e-5f; // search for lamda.
+const float kVoiceProbabilityThreshold = 0.5f;
+// Number of chunks after voice activity which is still considered speech.
+const size_t kSpeechOffsetDelay = 10;
+const float kDecayRate = 0.995f; // Power estimation decay rate.
+const float kMaxRelativeGainChange = 0.005f;
+const float kRho = 0.0004f; // Default production and interpretation SNR.
+const float kPowerNormalizationFactor = 1.f / (1 << 30);
+const float kMaxActiveSNR = 128.f; // 21dB
+const float kMinInactiveSNR = 32.f; // 15dB
+const size_t kGainUpdatePeriod = 10u;
+
+// Returns dot product of vectors |a| and |b| with size |length|.
+float DotProduct(const float* a, const float* b, size_t length) {
+ float ret = 0.f;
+ for (size_t i = 0; i < length; ++i) {
+ ret += a[i] * b[i];
+ }
+ return ret;
+}
+
+// Computes the power across ERB bands from the power spectral density |pow|.
+// Stores it in |result|.
+void MapToErbBands(const float* pow,
+ const std::vector<std::vector<float>>& filter_bank,
+ float* result) {
+ for (size_t i = 0; i < filter_bank.size(); ++i) {
+ RTC_DCHECK_GT(filter_bank[i].size(), 0);
+ result[i] = kPowerNormalizationFactor *
+ DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
+ }
+}
+
+} // namespace
+
+IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
+ size_t num_render_channels,
+ size_t num_bands,
+ size_t num_noise_bins)
+ : freqs_(RealFourier::ComplexLength(
+ RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
+ num_noise_bins_(num_noise_bins),
+ chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
+ bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
+ sample_rate_hz_(sample_rate_hz),
+ num_render_channels_(num_render_channels),
+ clear_power_estimator_(freqs_, kDecayRate),
+ noise_power_estimator_(num_noise_bins, kDecayRate),
+ filtered_clear_pow_(bank_size_, 0.f),
+ filtered_noise_pow_(num_noise_bins, 0.f),
+ center_freqs_(bank_size_),
+ capture_filter_bank_(CreateErbBank(num_noise_bins)),
+ render_filter_bank_(CreateErbBank(freqs_)),
+ gains_eq_(bank_size_),
+ gain_applier_(freqs_, kMaxRelativeGainChange),
+ audio_s16_(chunk_length_),
+ chunks_since_voice_(kSpeechOffsetDelay),
+ is_speech_(false),
+ snr_(kMaxActiveSNR),
+ is_active_(false),
+ num_chunks_(0u),
+ num_active_chunks_(0u),
+ noise_estimation_buffer_(num_noise_bins),
+ noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
+ std::vector<float>(num_noise_bins),
+ RenderQueueItemVerifier<float>(num_noise_bins)) {
+ RTC_DCHECK_LE(kRho, 1.f);
+
+ const size_t erb_index = static_cast<size_t>(
+ ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
+ 43.f));
+ start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
+
+ size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);
+ std::vector<float> kbd_window(window_size);
+ WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
+ kbd_window.data());
+ render_mangler_.reset(new LappedTransform(
+ num_render_channels_, num_render_channels_, chunk_length_,
+ kbd_window.data(), window_size, window_size / 2, this));
+
+ const size_t initial_delay = render_mangler_->initial_delay();
+ for (size_t i = 0u; i < num_bands - 1; ++i) {
+ high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>(
+ new intelligibility::DelayBuffer(initial_delay, num_render_channels_)));
+ }
+}
+
+IntelligibilityEnhancer::~IntelligibilityEnhancer() {
+ // Don't rely on this log, since the destructor isn't called when the
+ // app/tab is killed.
+ if (num_chunks_ > 0) {
+ RTC_LOG(LS_INFO) << "Intelligibility Enhancer was active for "
+ << 100.f * static_cast<float>(num_active_chunks_) /
+ num_chunks_
+ << "% of the call.";
+ } else {
+ RTC_LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk.";
+ }
+}
+
+void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
+ std::vector<float> noise, float gain) {
+ RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
+ for (auto& bin : noise) {
+ bin *= gain;
+ }
+ // Disregarding return value since buffer overflow is acceptable, because it
+ // is not critical to get each noise estimate.
+ if (noise_estimation_queue_.Insert(&noise)) {
+ };
+}
+
+void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) {
+ RTC_DCHECK_EQ(num_render_channels_, audio->num_channels());
+ while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
+ noise_power_estimator_.Step(noise_estimation_buffer_.data());
+ }
+ float* const* low_band = audio->split_channels_f(kBand0To8kHz);
+ is_speech_ = IsSpeech(low_band[0]);
+ render_mangler_->ProcessChunk(low_band, low_band);
+ DelayHighBands(audio);
+}
+
+void IntelligibilityEnhancer::ProcessAudioBlock(
+ const std::complex<float>* const* in_block,
+ size_t in_channels,
+ size_t frames,
+ size_t /* out_channels */,
+ std::complex<float>* const* out_block) {
+ RTC_DCHECK_EQ(freqs_, frames);
+ if (is_speech_) {
+ clear_power_estimator_.Step(in_block[0]);
+ }
+ SnrBasedEffectActivation();
+ ++num_chunks_;
+ if (is_active_) {
+ ++num_active_chunks_;
+ if (num_chunks_ % kGainUpdatePeriod == 0) {
+ MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
+ filtered_clear_pow_.data());
+ MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
+ filtered_noise_pow_.data());
+ SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
+ const float power_target = std::accumulate(
+ filtered_clear_pow_.data(),
+ filtered_clear_pow_.data() + bank_size_,
+ 0.f);
+ const float power_top =
+ DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+ SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
+ const float power_bot =
+ DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+ if (power_target >= power_bot && power_target <= power_top) {
+ SolveForLambda(power_target);
+ UpdateErbGains();
+ } // Else experiencing power underflow, so do nothing.
+ }
+ }
+ for (size_t i = 0; i < in_channels; ++i) {
+ gain_applier_.Apply(in_block[i], out_block[i]);
+ }
+}
+
+void IntelligibilityEnhancer::SnrBasedEffectActivation() {
+ const float* clear_psd = clear_power_estimator_.power().data();
+ const float* noise_psd = noise_power_estimator_.power().data();
+ const float clear_power =
+ std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
+ const float noise_power =
+ std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
+ snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /
+ (noise_power + std::numeric_limits<float>::epsilon());
+ if (is_active_) {
+ if (snr_ > kMaxActiveSNR) {
+ RTC_LOG(LS_INFO) << "Intelligibility Enhancer was deactivated at chunk "
+ << num_chunks_;
+ is_active_ = false;
+ // Set the target gains to unity.
+ float* gains = gain_applier_.target();
+ for (size_t i = 0; i < freqs_; ++i) {
+ gains[i] = 1.f;
+ }
+ }
+ } else {
+ if (snr_ < kMinInactiveSNR) {
+ RTC_LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
+ << num_chunks_;
+ is_active_ = true;
+ }
+ }
+}
+
+void IntelligibilityEnhancer::SolveForLambda(float power_target) {
+ const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
+ const int kMaxIters = 100; // for these, based on experiments.
+
+ const float reciprocal_power_target =
+ 1.f / (power_target + std::numeric_limits<float>::epsilon());
+ float lambda_bot = kLambdaBot;
+ float lambda_top = kLambdaTop;
+ float power_ratio = 2.f; // Ratio of achieved power to target power.
+ int iters = 0;
+ while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
+ const float lambda = (lambda_bot + lambda_top) / 2.f;
+ SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
+ const float power =
+ DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+ if (power < power_target) {
+ lambda_bot = lambda;
+ } else {
+ lambda_top = lambda;
+ }
+ power_ratio = std::fabs(power * reciprocal_power_target);
+ ++iters;
+ }
+}
+
+void IntelligibilityEnhancer::UpdateErbGains() {
+ // (ERB gain) = filterbank' * (freq gain)
+ float* gains = gain_applier_.target();
+ for (size_t i = 0; i < freqs_; ++i) {
+ gains[i] = 0.f;
+ for (size_t j = 0; j < bank_size_; ++j) {
+ gains[i] += render_filter_bank_[j][i] * gains_eq_[j];
+ }
+ }
+}
+
+size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
+ size_t erb_resolution) {
+ float freq_limit = sample_rate / 2000.f;
+ size_t erb_scale = static_cast<size_t>(ceilf(
+ 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f));
+ return erb_scale * erb_resolution;
+}
+
+std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
+ size_t num_freqs) {
+ std::vector<std::vector<float>> filter_bank(bank_size_);
+ size_t lf = 1, rf = 4;
+
+ for (size_t i = 0; i < bank_size_; ++i) {
+ float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution));
+ center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
+ center_freqs_[i] -= 14678.49f;
+ }
+ float last_center_freq = center_freqs_[bank_size_ - 1];
+ for (size_t i = 0; i < bank_size_; ++i) {
+ center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
+ }
+
+ for (size_t i = 0; i < bank_size_; ++i) {
+ filter_bank[i].resize(num_freqs);
+ }
+
+ for (size_t i = 1; i <= bank_size_; ++i) {
+ size_t lll = static_cast<size_t>(
+ round(center_freqs_[rtc::SafeMax<size_t>(1, i - lf) - 1] * num_freqs /
+ (0.5f * sample_rate_hz_)));
+ size_t ll = static_cast<size_t>(
+ round(center_freqs_[rtc::SafeMax<size_t>(1, i) - 1] * num_freqs /
+ (0.5f * sample_rate_hz_)));
+ lll = rtc::SafeClamp<size_t>(lll, 1, num_freqs) - 1;
+ ll = rtc::SafeClamp<size_t>(ll, 1, num_freqs) - 1;
+
+ size_t rrr = static_cast<size_t>(
+ round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + rf) - 1] *
+ num_freqs / (0.5f * sample_rate_hz_)));
+ size_t rr = static_cast<size_t>(
+ round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + 1) - 1] *
+ num_freqs / (0.5f * sample_rate_hz_)));
+ rrr = rtc::SafeClamp<size_t>(rrr, 1, num_freqs) - 1;
+ rr = rtc::SafeClamp<size_t>(rr, 1, num_freqs) - 1;
+
+ float step = ll == lll ? 0.f : 1.f / (ll - lll);
+ float element = 0.f;
+ for (size_t j = lll; j <= ll; ++j) {
+ filter_bank[i - 1][j] = element;
+ element += step;
+ }
+ step = rr == rrr ? 0.f : 1.f / (rrr - rr);
+ element = 1.f;
+ for (size_t j = rr; j <= rrr; ++j) {
+ filter_bank[i - 1][j] = element;
+ element -= step;
+ }
+ for (size_t j = ll; j <= rr; ++j) {
+ filter_bank[i - 1][j] = 1.f;
+ }
+ }
+
+ for (size_t i = 0; i < num_freqs; ++i) {
+ float sum = 0.f;
+ for (size_t j = 0; j < bank_size_; ++j) {
+ sum += filter_bank[j][i];
+ }
+ for (size_t j = 0; j < bank_size_; ++j) {
+ filter_bank[j][i] /= sum;
+ }
+ }
+ return filter_bank;
+}
+
+void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
+ size_t start_freq,
+ float* sols) {
+ const float kMinPower = 1e-5f;
+
+ const float* pow_x0 = filtered_clear_pow_.data();
+ const float* pow_n0 = filtered_noise_pow_.data();
+
+ for (size_t n = 0; n < start_freq; ++n) {
+ sols[n] = 1.f;
+ }
+
+ // Analytic solution for optimal gains. See paper for derivation.
+ for (size_t n = start_freq; n < bank_size_; ++n) {
+ if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
+ sols[n] = 1.f;
+ } else {
+ const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
+ lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
+ const float beta0 =
+ lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
+ const float alpha0 =
+ lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
+ RTC_DCHECK_LT(alpha0, 0.f);
+ // The quadratic equation should always have real roots, but to guard
+ // against numerical errors we limit it to a minimum of zero.
+ sols[n] = std::max(
+ 0.f, (-beta0 - std::sqrt(std::max(
+ 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
+ (2.f * alpha0));
+ }
+ }
+}
+
+bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
+ FloatToS16(audio, chunk_length_, audio_s16_.data());
+ vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
+ if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
+ chunks_since_voice_ = 0;
+ } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
+ ++chunks_since_voice_;
+ }
+ return chunks_since_voice_ < kSpeechOffsetDelay;
+}
+
+void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {
+ RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1);
+ for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) {
+ Band band = static_cast<Band>(i + 1);
+ high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_);
+ }
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
new file mode 100644
index 0000000000..3e0e269c58
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
+#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
+
+#include <complex>
+#include <memory>
+#include <vector>
+
+#include "common_audio/channel_buffer.h"
+#include "common_audio/lapped_transform.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
+#include "modules/audio_processing/render_queue_item_verifier.h"
+#include "modules/audio_processing/vad/voice_activity_detector.h"
+#include "rtc_base/swap_queue.h"
+
+namespace webrtc {
+
+// Speech intelligibility enhancement module. Reads render and capture
+// audio streams and modifies the render stream with a set of gains per
+// frequency bin to enhance speech against the noise background.
+// Details of the model and algorithm can be found in the original paper:
+// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
+class IntelligibilityEnhancer : public LappedTransform::Callback {
+ public:
+ IntelligibilityEnhancer(int sample_rate_hz,
+ size_t num_render_channels,
+ size_t num_bands,
+ size_t num_noise_bins);
+
+ ~IntelligibilityEnhancer() override;
+
+ // Sets the capture noise magnitude spectrum estimate.
+ void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);
+
+ // Reads chunk of speech in time domain and updates with modified signal.
+ void ProcessRenderAudio(AudioBuffer* audio);
+ bool active() const;
+
+ protected:
+ // All in frequency domain, receives input |in_block|, applies
+ // intelligibility enhancement, and writes result to |out_block|.
+ void ProcessAudioBlock(const std::complex<float>* const* in_block,
+ size_t in_channels,
+ size_t frames,
+ size_t out_channels,
+ std::complex<float>* const* out_block) override;
+
+ private:
+ FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate);
+ FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
+ FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
+ FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
+ TestNoiseGainHasExpectedResult);
+ FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
+ TestAllBandsHaveSameDelay);
+
+ // Updates the SNR estimation and enables or disables this component using a
+ // hysteresis.
+ void SnrBasedEffectActivation();
+
+ // Bisection search for optimal |lambda|.
+ void SolveForLambda(float power_target);
+
+ // Transforms freq gains to ERB gains.
+ void UpdateErbGains();
+
+ // Returns number of ERB filters.
+ static size_t GetBankSize(int sample_rate, size_t erb_resolution);
+
+ // Initializes ERB filterbank.
+ std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);
+
+ // Analytically solves quadratic for optimal gains given |lambda|.
+ // Negative gains are set to 0. Stores the results in |sols|.
+ void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
+
+ // Returns true if the audio is speech.
+ bool IsSpeech(const float* audio);
+
+ // Delays the high bands to compensate for the processing delay in the low
+ // band.
+ void DelayHighBands(AudioBuffer* audio);
+
+ static const size_t kMaxNumNoiseEstimatesToBuffer = 5;
+
+ const size_t freqs_; // Num frequencies in frequency domain.
+ const size_t num_noise_bins_;
+ const size_t chunk_length_; // Chunk size in samples.
+ const size_t bank_size_; // Num ERB filters.
+ const int sample_rate_hz_;
+ const size_t num_render_channels_;
+
+ intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
+ intelligibility::PowerEstimator<float> noise_power_estimator_;
+ std::vector<float> filtered_clear_pow_;
+ std::vector<float> filtered_noise_pow_;
+ std::vector<float> center_freqs_;
+ std::vector<std::vector<float>> capture_filter_bank_;
+ std::vector<std::vector<float>> render_filter_bank_;
+ size_t start_freq_;
+
+ std::vector<float> gains_eq_; // Pre-filter modified gains.
+ intelligibility::GainApplier gain_applier_;
+
+ std::unique_ptr<LappedTransform> render_mangler_;
+
+ VoiceActivityDetector vad_;
+ std::vector<int16_t> audio_s16_;
+ size_t chunks_since_voice_;
+ bool is_speech_;
+ float snr_;
+ bool is_active_;
+
+ unsigned long int num_chunks_;
+ unsigned long int num_active_chunks_;
+
+ std::vector<float> noise_estimation_buffer_;
+ SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>
+ noise_estimation_queue_;
+
+ std::vector<std::unique_ptr<intelligibility::DelayBuffer>>
+ high_bands_buffers_;
+};
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
new file mode 100644
index 0000000000..98a8dae469
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
@@ -0,0 +1,536 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "api/array_view.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
+#include "modules/audio_processing/noise_suppression_impl.h"
+#include "modules/audio_processing/test/audio_buffer_tools.h"
+#include "modules/audio_processing/test/bitexactness_tools.h"
+#include "rtc_base/arraysize.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+namespace {
+
+// Target output for ERB create test. Generated with matlab.
+const float kTestCenterFreqs[] = {
+ 14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f,
+ 137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f,
+ 309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f,
+ 551.371f, 593.293f, 637.386f, 683.77f, 732.581f, 783.96f, 838.06f,
+ 895.046f, 955.09f, 1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f,
+ 1391.22f, 1478.83f, 1571.5f, 1669.55f, 1773.37f, 1883.37f, 2000.f};
+const float kTestFilterBank[][33] = {
+ {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f,
+ 0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f,
+ 0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f,
+ 0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f,
+ 0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f,
+ 0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.307692f, 0.333333f,
+ 0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.166667f, 0.363636f, 0.333333f, 0.242424f,
+ 0.190476f, 0.133333f, 0.0689655f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f,
+ 0.0714286f, 0.f, 0.f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f,
+ 0.125f, 0.0655738f, 0.f, 0.f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.15873f, 0.333333f, 0.344828f, 0.357143f,
+ 0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.172414f, 0.357143f,
+ 0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f},
+ {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}};
+static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
+ "Test filterbank badly initialized.");
+
+// Target output for gain solving test. Generated with matlab.
+const size_t kTestStartFreq = 12; // Lowest integral frequency for ERBs.
+const float kTestZeroVar = 1.f;
+const float kTestNonZeroVarLambdaTop[] = {
+ 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+static_assert(arraysize(kTestCenterFreqs) ==
+ arraysize(kTestNonZeroVarLambdaTop),
+ "Power test data badly initialized.");
+const float kMaxTestError = 0.005f;
+
+// Enhancer initialization parameters.
+const int kSamples = 10000;
+const int kSampleRate = 4000;
+const int kNumChannels = 1;
+const int kFragmentSize = kSampleRate / 100;
+const size_t kNumNoiseBins = 129;
+const size_t kNumBands = 1;
+
+// Number of frames to process in the bitexactness tests.
+const size_t kNumFramesToProcess = 1000;
+
+int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {
+ return (sample_rate_hz > AudioProcessing::kSampleRate16kHz
+ ? AudioProcessing::kSampleRate16kHz
+ : sample_rate_hz);
+}
+
+// Process one frame of data and produce the output.
+void ProcessOneFrame(int sample_rate_hz,
+ AudioBuffer* render_audio_buffer,
+ AudioBuffer* capture_audio_buffer,
+ NoiseSuppressionImpl* noise_suppressor,
+ IntelligibilityEnhancer* intelligibility_enhancer) {
+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
+ render_audio_buffer->SplitIntoFrequencyBands();
+ capture_audio_buffer->SplitIntoFrequencyBands();
+ }
+
+ intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
+
+ noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
+ noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
+
+ intelligibility_enhancer->SetCaptureNoiseEstimate(
+ noise_suppressor->NoiseEstimate(), 0);
+
+ if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
+ render_audio_buffer->MergeFrequencyBands();
+ }
+}
+
+// Processes a specified amount of frames, verifies the results and reports
+// any errors.
+void RunBitexactnessTest(int sample_rate_hz,
+ size_t num_channels,
+ rtc::ArrayView<const float> output_reference) {
+ const StreamConfig render_config(sample_rate_hz, num_channels, false);
+ AudioBuffer render_buffer(
+ render_config.num_frames(), render_config.num_channels(),
+ render_config.num_frames(), render_config.num_channels(),
+ render_config.num_frames());
+ test::InputAudioFile render_file(
+ test::GetApmRenderTestVectorFileName(sample_rate_hz));
+ std::vector<float> render_input(render_buffer.num_frames() *
+ render_buffer.num_channels());
+
+ const StreamConfig capture_config(sample_rate_hz, num_channels, false);
+ AudioBuffer capture_buffer(
+ capture_config.num_frames(), capture_config.num_channels(),
+ capture_config.num_frames(), capture_config.num_channels(),
+ capture_config.num_frames());
+ test::InputAudioFile capture_file(
+ test::GetApmCaptureTestVectorFileName(sample_rate_hz));
+ std::vector<float> capture_input(render_buffer.num_frames() *
+ capture_buffer.num_channels());
+
+ rtc::CriticalSection crit_capture;
+ NoiseSuppressionImpl noise_suppressor(&crit_capture);
+ noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz);
+ noise_suppressor.Enable(true);
+
+ IntelligibilityEnhancer intelligibility_enhancer(
+ IntelligibilityEnhancerSampleRate(sample_rate_hz),
+ render_config.num_channels(), kNumBands,
+ NoiseSuppressionImpl::num_noise_bins());
+
+ for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
+ ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
+ render_buffer.num_channels(), &render_file,
+ render_input);
+ ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(),
+ capture_buffer.num_channels(), &capture_file,
+ capture_input);
+
+ test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer);
+ test::CopyVectorToAudioBuffer(capture_config, capture_input,
+ &capture_buffer);
+
+ ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer,
+ &noise_suppressor, &intelligibility_enhancer);
+ }
+
+ // Extract and verify the test results.
+ std::vector<float> render_output;
+ test::ExtractVectorFromAudioBuffer(render_config, &render_buffer,
+ &render_output);
+
+ const float kElementErrorBound = 1.f / static_cast<float>(1 << 15);
+
+ // Compare the output with the reference. Only the first values of the output
+ // from last frame processed are compared in order not having to specify all
+ // preceeding frames as testvectors. As the algorithm being tested has a
+ // memory, testing only the last frame implicitly also tests the preceeding
+ // frames.
+ EXPECT_TRUE(test::VerifyDeinterleavedArray(
+ render_buffer.num_frames(), render_config.num_channels(),
+ output_reference, render_output, kElementErrorBound));
+}
+
+float float_rand() {
+ return std::rand() * 2.f / RAND_MAX - 1;
+}
+
+} // namespace
+
+class IntelligibilityEnhancerTest : public ::testing::Test {
+ protected:
+ IntelligibilityEnhancerTest()
+ : clear_buffer_(kFragmentSize,
+ kNumChannels,
+ kFragmentSize,
+ kNumChannels,
+ kFragmentSize),
+ stream_config_(kSampleRate, kNumChannels),
+ clear_data_(kSamples),
+ noise_data_(kNumNoiseBins),
+ orig_data_(kSamples) {
+ std::srand(1);
+ enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
+ kNumNoiseBins));
+ }
+
+ bool CheckUpdate() {
+ enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
+ kNumNoiseBins));
+ float* clear_cursor = clear_data_.data();
+ for (int i = 0; i < kSamples; i += kFragmentSize) {
+ enh_->SetCaptureNoiseEstimate(noise_data_, 1);
+ clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
+ enh_->ProcessRenderAudio(&clear_buffer_);
+ clear_buffer_.CopyTo(stream_config_, &clear_cursor);
+ clear_cursor += kFragmentSize;
+ }
+ for (int i = initial_delay_; i < kSamples; i++) {
+ if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
+ kMaxTestError) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ std::unique_ptr<IntelligibilityEnhancer> enh_;
+ // Render clean speech buffer.
+ AudioBuffer clear_buffer_;
+ StreamConfig stream_config_;
+ std::vector<float> clear_data_;
+ std::vector<float> noise_data_;
+ std::vector<float> orig_data_;
+ size_t initial_delay_;
+};
+
+// For each class of generated data, tests that render stream is updated when
+// it should be.
+TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
+ initial_delay_ = enh_->render_mangler_->initial_delay();
+ std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
+ std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
+ std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
+ EXPECT_FALSE(CheckUpdate());
+ std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
+ orig_data_ = clear_data_;
+ EXPECT_FALSE(CheckUpdate());
+ std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
+ orig_data_ = clear_data_;
+ std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
+ FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
+ EXPECT_TRUE(CheckUpdate());
+}
+
+// Tests ERB bank creation, comparing against matlab output.
+TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
+ ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);
+ for (size_t i = 0; i < enh_->bank_size_; ++i) {
+ EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
+ ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);
+ for (size_t j = 0; j < enh_->freqs_; ++j) {
+ EXPECT_NEAR(kTestFilterBank[i][j], enh_->render_filter_bank_[i][j],
+ kMaxTestError);
+ }
+ }
+}
+
+// Tests analytic solution for optimal gains, comparing
+// against matlab output.
+TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
+ ASSERT_EQ(kTestStartFreq, enh_->start_freq_);
+ std::vector<float> sols(enh_->bank_size_);
+ float lambda = -0.001f;
+ for (size_t i = 0; i < enh_->bank_size_; i++) {
+ enh_->filtered_clear_pow_[i] = 0.f;
+ enh_->filtered_noise_pow_[i] = 0.f;
+ }
+ enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
+ for (size_t i = 0; i < enh_->bank_size_; i++) {
+ EXPECT_NEAR(kTestZeroVar, sols[i], kMaxTestError);
+ }
+ for (size_t i = 0; i < enh_->bank_size_; i++) {
+ enh_->filtered_clear_pow_[i] = static_cast<float>(i + 1);
+ enh_->filtered_noise_pow_[i] = static_cast<float>(enh_->bank_size_ - i);
+ }
+ enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
+ for (size_t i = 0; i < enh_->bank_size_; i++) {
+ EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
+ }
+ lambda = -1.f;
+ enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
+ for (size_t i = 0; i < enh_->bank_size_; i++) {
+ EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
+ }
+}
+
+TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
+ const float kGain = 2.f;
+ const float kTolerance = 0.007f;
+ std::vector<float> noise(kNumNoiseBins);
+ std::vector<float> noise_psd(kNumNoiseBins);
+ std::generate(noise.begin(), noise.end(), float_rand);
+ for (size_t i = 0; i < kNumNoiseBins; ++i) {
+ noise_psd[i] = kGain * kGain * noise[i] * noise[i];
+ }
+ float* clear_cursor = clear_data_.data();
+ for (size_t i = 0; i < kNumFramesToProcess; ++i) {
+ enh_->SetCaptureNoiseEstimate(noise, kGain);
+ clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
+ enh_->ProcessRenderAudio(&clear_buffer_);
+ }
+ const std::vector<float>& estimated_psd =
+ enh_->noise_power_estimator_.power();
+ for (size_t i = 0; i < kNumNoiseBins; ++i) {
+ EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],
+ kTolerance);
+ }
+}
+
+TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) {
+ const int kTestSampleRate = AudioProcessing::kSampleRate32kHz;
+ const int kTestSplitRate = AudioProcessing::kSampleRate16kHz;
+ const size_t kTestNumBands =
+ rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate);
+ const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100);
+ const size_t kTestSplitFragmentSize =
+ rtc::CheckedDivExact(kTestSplitRate, 100);
+ enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels,
+ kTestNumBands, kNumNoiseBins));
+ size_t initial_delay = enh_->render_mangler_->initial_delay();
+ std::vector<float> rand_gen_buf(kTestFragmentSize);
+ AudioBuffer original_buffer(kTestFragmentSize, kNumChannels,
+ kTestFragmentSize, kNumChannels,
+ kTestFragmentSize);
+ AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize,
+ kNumChannels, kTestFragmentSize);
+ for (size_t i = 0u; i < kTestNumBands; ++i) {
+ std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand);
+ original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
+ rand_gen_buf.size());
+ audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
+ rand_gen_buf.size());
+ }
+ enh_->ProcessRenderAudio(&audio_buffer);
+ for (size_t i = 0u; i < kTestNumBands; ++i) {
+ const float* original_ptr = original_buffer.split_bands_const_f(0)[i];
+ const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i];
+ for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) {
+ EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]),
+ kMaxTestError);
+ }
+ }
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
+ const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) {
+ const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) {
+ const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) {
+ const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) {
+ const float kOutputReference[] = {0.021454f, 0.035919f, 0.026428f,
+ -0.000641f, 0.000366f, 0.000641f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) {
+ const float kOutputReference[] = {0.021362f, 0.035736f, 0.023895f,
+ -0.001404f, -0.001465f, 0.000549f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) {
+ const float kOutputReference[] = {0.030641f, 0.027406f, 0.028321f,
+ -0.001343f, -0.004578f, 0.000977f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {
+ const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,
+ -0.012975f, -0.015940f, -0.017820f};
+
+ RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
+}
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
new file mode 100644
index 0000000000..b6917f4407
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+#include <limits>
+
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+
+namespace intelligibility {
+
+namespace {
+
+const float kMinFactor = 0.01f;
+const float kMaxFactor = 100.f;
+
+// Return |current| changed towards |target|, with the relative change being at
+// most |limit|.
+float UpdateFactor(float target, float current, float limit) {
+ const float gain = target / (current + std::numeric_limits<float>::epsilon());
+ const float clamped_gain = rtc::SafeClamp(gain, 1 - limit, 1 + limit);
+ return rtc::SafeClamp(current * clamped_gain, kMinFactor, kMaxFactor);
+}
+
+} // namespace
+
+template<typename T>
+PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay)
+ : power_(num_freqs, 0.f), decay_(decay) {}
+
+template<typename T>
+void PowerEstimator<T>::Step(const T* data) {
+ for (size_t i = 0; i < power_.size(); ++i) {
+ power_[i] = decay_ * power_[i] +
+ (1.f - decay_) * std::abs(data[i]) * std::abs(data[i]);
+ }
+}
+
+template class PowerEstimator<float>;
+template class PowerEstimator<std::complex<float>>;
+
+GainApplier::GainApplier(size_t freqs, float relative_change_limit)
+ : num_freqs_(freqs),
+ relative_change_limit_(relative_change_limit),
+ target_(freqs, 1.f),
+ current_(freqs, 1.f) {}
+
+GainApplier::~GainApplier() {}
+
+void GainApplier::Apply(const std::complex<float>* in_block,
+ std::complex<float>* out_block) {
+ for (size_t i = 0; i < num_freqs_; ++i) {
+ current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
+ out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
+ }
+}
+
+DelayBuffer::DelayBuffer(size_t delay, size_t num_channels)
+ : buffer_(num_channels, std::vector<float>(delay, 0.f)), read_index_(0u) {}
+
+DelayBuffer::~DelayBuffer() {}
+
+void DelayBuffer::Delay(float* const* data, size_t length) {
+ size_t sample_index = read_index_;
+ for (size_t i = 0u; i < buffer_.size(); ++i) {
+ sample_index = read_index_;
+ for (size_t j = 0u; j < length; ++j) {
+ float swap = data[i][j];
+ data[i][j] = buffer_[i][sample_index];
+ buffer_[i][sample_index] = swap;
+ if (++sample_index == buffer_.size()) {
+ sample_index = 0u;
+ }
+ }
+ }
+ read_index_ = sample_index;
+}
+
+} // namespace intelligibility
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
new file mode 100644
index 0000000000..4dc17d50b5
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
+#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
+
+#include <complex>
+#include <vector>
+
+namespace webrtc {
+
+namespace intelligibility {
+
+// Internal helper for computing the power of a stream of arrays.
+// The result is an array of power per position: the i-th power is the power of
+// the stream of data on the i-th positions in the input arrays.
+template <typename T>
+class PowerEstimator {
+ public:
+ // Construct an instance for the given input array length (|freqs|), with the
+ // appropriate parameters. |decay| is the forgetting factor.
+ PowerEstimator(size_t freqs, float decay);
+
+ // Add a new data point to the series.
+ void Step(const T* data);
+
+ // The current power array.
+ const std::vector<float>& power() { return power_; };
+
+ private:
+ // The current power array.
+ std::vector<float> power_;
+
+ const float decay_;
+};
+
+// Helper class for smoothing gain changes. On each application step, the
+// currently used gains are changed towards a set of settable target gains,
+// constrained by a limit on the relative changes.
+class GainApplier {
+ public:
+ GainApplier(size_t freqs, float relative_change_limit);
+
+ ~GainApplier();
+
+ // Copy |in_block| to |out_block|, multiplied by the current set of gains,
+ // and step the current set of gains towards the target set.
+ void Apply(const std::complex<float>* in_block,
+ std::complex<float>* out_block);
+
+ // Return the current target gain set. Modify this array to set the targets.
+ float* target() { return target_.data(); }
+
+ private:
+ const size_t num_freqs_;
+ const float relative_change_limit_;
+ std::vector<float> target_;
+ std::vector<float> current_;
+};
+
+// Helper class to delay a signal by an integer number of samples.
+class DelayBuffer {
+ public:
+ DelayBuffer(size_t delay, size_t num_channels);
+
+ ~DelayBuffer();
+
+ void Delay(float* const* data, size_t length);
+
+ private:
+ std::vector<std::vector<float>> buffer_;
+ size_t read_index_;
+};
+
+} // namespace intelligibility
+
+} // namespace webrtc
+
+#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
new file mode 100644
index 0000000000..fea394c338
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cmath>
+#include <complex>
+#include <vector>
+
+#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
+#include "rtc_base/arraysize.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+namespace intelligibility {
+
+std::vector<std::vector<std::complex<float>>> GenerateTestData(size_t freqs,
+ size_t samples) {
+ std::vector<std::vector<std::complex<float>>> data(samples);
+ for (size_t i = 0; i < samples; ++i) {
+ for (size_t j = 0; j < freqs; ++j) {
+ const float val = 0.99f / ((i + 1) * (j + 1));
+ data[i].push_back(std::complex<float>(val, val));
+ }
+ }
+ return data;
+}
+
+// Tests PowerEstimator, for all power step types.
+TEST(IntelligibilityUtilsTest, TestPowerEstimator) {
+ const size_t kFreqs = 10;
+ const size_t kSamples = 100;
+ const float kDecay = 0.5f;
+ const std::vector<std::vector<std::complex<float>>> test_data(
+ GenerateTestData(kFreqs, kSamples));
+ PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay);
+ EXPECT_EQ(0, power_estimator.power()[0]);
+
+ // Makes sure Step is doing something.
+ power_estimator.Step(test_data[0].data());
+ for (size_t i = 1; i < kSamples; ++i) {
+ power_estimator.Step(test_data[i].data());
+ for (size_t j = 0; j < kFreqs; ++j) {
+ EXPECT_GE(power_estimator.power()[j], 0.f);
+ EXPECT_LE(power_estimator.power()[j], 1.f);
+ }
+ }
+}
+
+// Tests gain applier.
+TEST(IntelligibilityUtilsTest, TestGainApplier) {
+ const size_t kFreqs = 10;
+ const size_t kSamples = 100;
+ const float kChangeLimit = 0.1f;
+ GainApplier gain_applier(kFreqs, kChangeLimit);
+ const std::vector<std::vector<std::complex<float>>> in_data(
+ GenerateTestData(kFreqs, kSamples));
+ std::vector<std::vector<std::complex<float>>> out_data(
+ GenerateTestData(kFreqs, kSamples));
+ for (size_t i = 0; i < kSamples; ++i) {
+ gain_applier.Apply(in_data[i].data(), out_data[i].data());
+ for (size_t j = 0; j < kFreqs; ++j) {
+ EXPECT_GT(out_data[i][j].real(), 0.f);
+ EXPECT_LT(out_data[i][j].real(), 1.f);
+ EXPECT_GT(out_data[i][j].imag(), 0.f);
+ EXPECT_LT(out_data[i][j].imag(), 1.f);
+ }
+ }
+}
+
+} // namespace intelligibility
+
+} // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
new file mode 100644
index 0000000000..b90449caa3
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "common_audio/channel_buffer.h"
+#include "common_audio/include/audio_util.h"
+#include "common_audio/wav_file.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
+#include "modules/audio_processing/noise_suppression_impl.h"
+#include "rtc_base/criticalsection.h"
+#include "rtc_base/flags.h"
+
+using std::complex;
+
+namespace webrtc {
+namespace {
+
+DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
+DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
+DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
+DEFINE_bool(help, false, "Print this message.");
+
+int int_main(int argc, char* argv[]) {
+ if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) {
+ return 1;
+ }
+ if (FLAG_help) {
+ rtc::FlagList::Print(nullptr, false);
+ return 0;
+ }
+ if (argc != 1) {
+ printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
+ return 0;
+ }
+
+ WavReader in_file(FLAG_clear_file);
+ WavReader noise_file(FLAG_noise_file);
+ WavWriter out_file(FLAG_out_file, in_file.sample_rate(),
+ in_file.num_channels());
+ rtc::CriticalSection crit;
+ NoiseSuppressionImpl ns(&crit);
+ IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u,
+ NoiseSuppressionImpl::num_noise_bins());
+ ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
+ ns.Enable(true);
+ const size_t in_samples = noise_file.sample_rate() / 100;
+ const size_t noise_samples = noise_file.sample_rate() / 100;
+ std::vector<float> in(in_samples * in_file.num_channels());
+ std::vector<float> noise(noise_samples * noise_file.num_channels());
+ ChannelBuffer<float> in_buf(in_samples, in_file.num_channels());
+ ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels());
+ AudioBuffer capture_audio(noise_samples, noise_file.num_channels(),
+ noise_samples, noise_file.num_channels(),
+ noise_samples);
+ AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples,
+ in_file.num_channels(), in_samples);
+ StreamConfig noise_config(noise_file.sample_rate(),
+ noise_file.num_channels());
+ StreamConfig in_config(in_file.sample_rate(), in_file.num_channels());
+ while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
+ noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
+ FloatS16ToFloat(noise.data(), noise.size(), noise.data());
+ FloatS16ToFloat(in.data(), in.size(), in.data());
+ Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
+ in_buf.channels());
+ Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(),
+ noise_buf.channels());
+ capture_audio.CopyFrom(noise_buf.channels(), noise_config);
+ render_audio.CopyFrom(in_buf.channels(), in_config);
+ ns.AnalyzeCaptureAudio(&capture_audio);
+ ns.ProcessCaptureAudio(&capture_audio);
+ enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1);
+ enh.ProcessRenderAudio(&render_audio);
+ render_audio.CopyTo(in_config, in_buf.channels());
+ Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
+ in.data());
+ FloatToFloatS16(in.data(), in.size(), in.data());
+ out_file.WriteSamples(in.data(), in.size());
+ }
+
+ return 0;
+}
+
+} // namespace
+} // namespace webrtc
+
+int main(int argc, char* argv[]) {
+ return webrtc::int_main(argc, argv);
+}