Adding upstream version 86.0.1.upstream/86.0.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
commit: 2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree: b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility
parent: Initial commit. (diff)
download: firefox-upstream.tar.xz
firefox-upstream.zip
7 files changed, 1420 insertions, 0 deletions
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
new file mode 100644
index 0000000000..0e696d9fff
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@@ -0,0 +1,392 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <limits>
+#include <numeric>
+
+#include "common_audio/include/audio_util.h"
+#include "common_audio/window_generator.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+
+namespace {
+
+const size_t kErbResolution = 2;
+const int kWindowSizeMs = 16;
+const int kChunkSizeMs = 10;  // Size provided by APM.
+const float kClipFreqKhz = 0.2f;
+const float kKbdAlpha = 1.5f;
+const float kLambdaBot = -1.f;      // Extreme values in bisection
+const float kLambdaTop = -1e-5f;      // search for lamda.
+const float kVoiceProbabilityThreshold = 0.5f;
+// Number of chunks after voice activity which is still considered speech.
+const size_t kSpeechOffsetDelay = 10;
+const float kDecayRate = 0.995f;              // Power estimation decay rate.
+const float kMaxRelativeGainChange = 0.005f;
+const float kRho = 0.0004f;  // Default production and interpretation SNR.
+const float kPowerNormalizationFactor = 1.f / (1 << 30);
+const float kMaxActiveSNR = 128.f;  // 21dB
+const float kMinInactiveSNR = 32.f;  // 15dB
+const size_t kGainUpdatePeriod = 10u;
+
+// Returns dot product of vectors |a| and |b| with size |length|.
+float DotProduct(const float* a, const float* b, size_t length) {
+  float ret = 0.f;
+  for (size_t i = 0; i < length; ++i) {
+    ret += a[i] * b[i];
+  }
+  return ret;
+}
+
+// Computes the power across ERB bands from the power spectral density |pow|.
+// Stores it in |result|.
+void MapToErbBands(const float* pow,
+                   const std::vector<std::vector<float>>& filter_bank,
+                   float* result) {
+  for (size_t i = 0; i < filter_bank.size(); ++i) {
+    RTC_DCHECK_GT(filter_bank[i].size(), 0);
+    result[i] = kPowerNormalizationFactor *
+                DotProduct(filter_bank[i].data(), pow, filter_bank[i].size());
+  }
+}
+
+}  // namespace
+
+IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
+                                                 size_t num_render_channels,
+                                                 size_t num_bands,
+                                                 size_t num_noise_bins)
+    : freqs_(RealFourier::ComplexLength(
+          RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
+      num_noise_bins_(num_noise_bins),
+      chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
+      bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
+      sample_rate_hz_(sample_rate_hz),
+      num_render_channels_(num_render_channels),
+      clear_power_estimator_(freqs_, kDecayRate),
+      noise_power_estimator_(num_noise_bins, kDecayRate),
+      filtered_clear_pow_(bank_size_, 0.f),
+      filtered_noise_pow_(num_noise_bins, 0.f),
+      center_freqs_(bank_size_),
+      capture_filter_bank_(CreateErbBank(num_noise_bins)),
+      render_filter_bank_(CreateErbBank(freqs_)),
+      gains_eq_(bank_size_),
+      gain_applier_(freqs_, kMaxRelativeGainChange),
+      audio_s16_(chunk_length_),
+      chunks_since_voice_(kSpeechOffsetDelay),
+      is_speech_(false),
+      snr_(kMaxActiveSNR),
+      is_active_(false),
+      num_chunks_(0u),
+      num_active_chunks_(0u),
+      noise_estimation_buffer_(num_noise_bins),
+      noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
+                              std::vector<float>(num_noise_bins),
+                              RenderQueueItemVerifier<float>(num_noise_bins)) {
+  RTC_DCHECK_LE(kRho, 1.f);
+
+  const size_t erb_index = static_cast<size_t>(
+      ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
+            43.f));
+  start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);
+
+  size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_);
+  std::vector<float> kbd_window(window_size);
+  WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size,
+                                       kbd_window.data());
+  render_mangler_.reset(new LappedTransform(
+      num_render_channels_, num_render_channels_, chunk_length_,
+      kbd_window.data(), window_size, window_size / 2, this));
+
+  const size_t initial_delay = render_mangler_->initial_delay();
+  for (size_t i = 0u; i < num_bands - 1; ++i) {
+    high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>(
+        new intelligibility::DelayBuffer(initial_delay, num_render_channels_)));
+  }
+}
+
+IntelligibilityEnhancer::~IntelligibilityEnhancer() {
+  // Don't rely on this log, since the destructor isn't called when the
+  // app/tab is killed.
+  if (num_chunks_ > 0) {
+    RTC_LOG(LS_INFO) << "Intelligibility Enhancer was active for "
+                     << 100.f * static_cast<float>(num_active_chunks_) /
+                            num_chunks_
+                     << "% of the call.";
+  } else {
+    RTC_LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk.";
+  }
+}
+
+void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
+    std::vector<float> noise, float gain) {
+  RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
+  for (auto& bin : noise) {
+    bin *= gain;
+  }
+  // Disregarding return value since buffer overflow is acceptable, because it
+  // is not critical to get each noise estimate.
+  if (noise_estimation_queue_.Insert(&noise)) {
+  };
+}
+
+void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) {
+  RTC_DCHECK_EQ(num_render_channels_, audio->num_channels());
+  while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
+    noise_power_estimator_.Step(noise_estimation_buffer_.data());
+  }
+  float* const* low_band = audio->split_channels_f(kBand0To8kHz);
+  is_speech_ = IsSpeech(low_band[0]);
+  render_mangler_->ProcessChunk(low_band, low_band);
+  DelayHighBands(audio);
+}
+
+void IntelligibilityEnhancer::ProcessAudioBlock(
+    const std::complex<float>* const* in_block,
+    size_t in_channels,
+    size_t frames,
+    size_t /* out_channels */,
+    std::complex<float>* const* out_block) {
+  RTC_DCHECK_EQ(freqs_, frames);
+  if (is_speech_) {
+    clear_power_estimator_.Step(in_block[0]);
+  }
+  SnrBasedEffectActivation();
+  ++num_chunks_;
+  if (is_active_) {
+    ++num_active_chunks_;
+    if (num_chunks_ % kGainUpdatePeriod == 0) {
+      MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
+                    filtered_clear_pow_.data());
+      MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
+                    filtered_noise_pow_.data());
+      SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
+      const float power_target = std::accumulate(
+          filtered_clear_pow_.data(),
+          filtered_clear_pow_.data() + bank_size_,
+          0.f);
+      const float power_top =
+          DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+      SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
+      const float power_bot =
+          DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+      if (power_target >= power_bot && power_target <= power_top) {
+        SolveForLambda(power_target);
+        UpdateErbGains();
+      }  // Else experiencing power underflow, so do nothing.
+    }
+  }
+  for (size_t i = 0; i < in_channels; ++i) {
+    gain_applier_.Apply(in_block[i], out_block[i]);
+  }
+}
+
+void IntelligibilityEnhancer::SnrBasedEffectActivation() {
+  const float* clear_psd = clear_power_estimator_.power().data();
+  const float* noise_psd = noise_power_estimator_.power().data();
+  const float clear_power =
+      std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
+  const float noise_power =
+      std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
+  snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /
+      (noise_power + std::numeric_limits<float>::epsilon());
+  if (is_active_) {
+    if (snr_ > kMaxActiveSNR) {
+      RTC_LOG(LS_INFO) << "Intelligibility Enhancer was deactivated at chunk "
+                       << num_chunks_;
+      is_active_ = false;
+      // Set the target gains to unity.
+      float* gains = gain_applier_.target();
+      for (size_t i = 0; i < freqs_; ++i) {
+        gains[i] = 1.f;
+      }
+    }
+  } else {
+    if (snr_ < kMinInactiveSNR) {
+      RTC_LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk "
+                       << num_chunks_;
+      is_active_ = true;
+    }
+  }
+}
+
+void IntelligibilityEnhancer::SolveForLambda(float power_target) {
+  const float kConvergeThresh = 0.001f;  // TODO(ekmeyerson): Find best values
+  const int kMaxIters = 100;             // for these, based on experiments.
+
+  const float reciprocal_power_target =
+      1.f / (power_target + std::numeric_limits<float>::epsilon());
+  float lambda_bot = kLambdaBot;
+  float lambda_top = kLambdaTop;
+  float power_ratio = 2.f;  // Ratio of achieved power to target power.
+  int iters = 0;
+  while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
+    const float lambda = (lambda_bot + lambda_top) / 2.f;
+    SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data());
+    const float power =
+        DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+    if (power < power_target) {
+      lambda_bot = lambda;
+    } else {
+      lambda_top = lambda;
+    }
+    power_ratio = std::fabs(power * reciprocal_power_target);
+    ++iters;
+  }
+}
+
+void IntelligibilityEnhancer::UpdateErbGains() {
+  // (ERB gain) = filterbank' * (freq gain)
+  float* gains = gain_applier_.target();
+  for (size_t i = 0; i < freqs_; ++i) {
+    gains[i] = 0.f;
+    for (size_t j = 0; j < bank_size_; ++j) {
+      gains[i] += render_filter_bank_[j][i] * gains_eq_[j];
+    }
+  }
+}
+
+size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
+                                            size_t erb_resolution) {
+  float freq_limit = sample_rate / 2000.f;
+  size_t erb_scale = static_cast<size_t>(ceilf(
+      11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f));
+  return erb_scale * erb_resolution;
+}
+
+std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
+    size_t num_freqs) {
+  std::vector<std::vector<float>> filter_bank(bank_size_);
+  size_t lf = 1, rf = 4;
+
+  for (size_t i = 0; i < bank_size_; ++i) {
+    float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution));
+    center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
+    center_freqs_[i] -= 14678.49f;
+  }
+  float last_center_freq = center_freqs_[bank_size_ - 1];
+  for (size_t i = 0; i < bank_size_; ++i) {
+    center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
+  }
+
+  for (size_t i = 0; i < bank_size_; ++i) {
+    filter_bank[i].resize(num_freqs);
+  }
+
+  for (size_t i = 1; i <= bank_size_; ++i) {
+    size_t lll = static_cast<size_t>(
+        round(center_freqs_[rtc::SafeMax<size_t>(1, i - lf) - 1] * num_freqs /
+              (0.5f * sample_rate_hz_)));
+    size_t ll = static_cast<size_t>(
+        round(center_freqs_[rtc::SafeMax<size_t>(1, i) - 1] * num_freqs /
+              (0.5f * sample_rate_hz_)));
+    lll = rtc::SafeClamp<size_t>(lll, 1, num_freqs) - 1;
+    ll = rtc::SafeClamp<size_t>(ll, 1, num_freqs) - 1;
+
+    size_t rrr = static_cast<size_t>(
+        round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + rf) - 1] *
+              num_freqs / (0.5f * sample_rate_hz_)));
+    size_t rr = static_cast<size_t>(
+        round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + 1) - 1] *
+              num_freqs / (0.5f * sample_rate_hz_)));
+    rrr = rtc::SafeClamp<size_t>(rrr, 1, num_freqs) - 1;
+    rr = rtc::SafeClamp<size_t>(rr, 1, num_freqs) - 1;
+
+    float step = ll == lll ? 0.f : 1.f / (ll - lll);
+    float element = 0.f;
+    for (size_t j = lll; j <= ll; ++j) {
+      filter_bank[i - 1][j] = element;
+      element += step;
+    }
+    step = rr == rrr ? 0.f : 1.f / (rrr - rr);
+    element = 1.f;
+    for (size_t j = rr; j <= rrr; ++j) {
+      filter_bank[i - 1][j] = element;
+      element -= step;
+    }
+    for (size_t j = ll; j <= rr; ++j) {
+      filter_bank[i - 1][j] = 1.f;
+    }
+  }
+
+  for (size_t i = 0; i < num_freqs; ++i) {
+    float sum = 0.f;
+    for (size_t j = 0; j < bank_size_; ++j) {
+      sum += filter_bank[j][i];
+    }
+    for (size_t j = 0; j < bank_size_; ++j) {
+      filter_bank[j][i] /= sum;
+    }
+  }
+  return filter_bank;
+}
+
+void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
+                                                       size_t start_freq,
+                                                       float* sols) {
+  const float kMinPower = 1e-5f;
+
+  const float* pow_x0 = filtered_clear_pow_.data();
+  const float* pow_n0 = filtered_noise_pow_.data();
+
+  for (size_t n = 0; n < start_freq; ++n) {
+    sols[n] = 1.f;
+  }
+
+  // Analytic solution for optimal gains. See paper for derivation.
+  for (size_t n = start_freq; n < bank_size_; ++n) {
+    if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) {
+      sols[n] = 1.f;
+    } else {
+      const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
+                           lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
+      const float beta0 =
+          lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n];
+      const float alpha0 =
+          lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n];
+      RTC_DCHECK_LT(alpha0, 0.f);
+      // The quadratic equation should always have real roots, but to guard
+      // against numerical errors we limit it to a minimum of zero.
+      sols[n] = std::max(
+          0.f, (-beta0 - std::sqrt(std::max(
+                             0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) /
+                   (2.f * alpha0));
+    }
+  }
+}
+
+bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
+  FloatToS16(audio, chunk_length_, audio_s16_.data());
+  vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_);
+  if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
+    chunks_since_voice_ = 0;
+  } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
+    ++chunks_since_voice_;
+  }
+  return chunks_since_voice_ < kSpeechOffsetDelay;
+}
+
+void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) {
+  RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1);
+  for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) {
+    Band band = static_cast<Band>(i + 1);
+    high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_);
+  }
+}
+
+}  // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
new file mode 100644
index 0000000000..3e0e269c58
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
@@ -0,0 +1,137 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
+#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
+
+#include <complex>
+#include <memory>
+#include <vector>
+
+#include "common_audio/channel_buffer.h"
+#include "common_audio/lapped_transform.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
+#include "modules/audio_processing/render_queue_item_verifier.h"
+#include "modules/audio_processing/vad/voice_activity_detector.h"
+#include "rtc_base/swap_queue.h"
+
+namespace webrtc {
+
+// Speech intelligibility enhancement module. Reads render and capture
+// audio streams and modifies the render stream with a set of gains per
+// frequency bin to enhance speech against the noise background.
+// Details of the model and algorithm can be found in the original paper:
+// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
+class IntelligibilityEnhancer : public LappedTransform::Callback {
+ public:
+  IntelligibilityEnhancer(int sample_rate_hz,
+                          size_t num_render_channels,
+                          size_t num_bands,
+                          size_t num_noise_bins);
+
+  ~IntelligibilityEnhancer() override;
+
+  // Sets the capture noise magnitude spectrum estimate.
+  void SetCaptureNoiseEstimate(std::vector<float> noise, float gain);
+
+  // Reads chunk of speech in time domain and updates with modified signal.
+  void ProcessRenderAudio(AudioBuffer* audio);
+  bool active() const;
+
+ protected:
+  // All in frequency domain, receives input |in_block|, applies
+  // intelligibility enhancement, and writes result to |out_block|.
+  void ProcessAudioBlock(const std::complex<float>* const* in_block,
+                         size_t in_channels,
+                         size_t frames,
+                         size_t out_channels,
+                         std::complex<float>* const* out_block) override;
+
+ private:
+  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate);
+  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
+  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
+  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
+                           TestNoiseGainHasExpectedResult);
+  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest,
+                           TestAllBandsHaveSameDelay);
+
+  // Updates the SNR estimation and enables or disables this component using a
+  // hysteresis.
+  void SnrBasedEffectActivation();
+
+  // Bisection search for optimal |lambda|.
+  void SolveForLambda(float power_target);
+
+  // Transforms freq gains to ERB gains.
+  void UpdateErbGains();
+
+  // Returns number of ERB filters.
+  static size_t GetBankSize(int sample_rate, size_t erb_resolution);
+
+  // Initializes ERB filterbank.
+  std::vector<std::vector<float>> CreateErbBank(size_t num_freqs);
+
+  // Analytically solves quadratic for optimal gains given |lambda|.
+  // Negative gains are set to 0. Stores the results in |sols|.
+  void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
+
+  // Returns true if the audio is speech.
+  bool IsSpeech(const float* audio);
+
+  // Delays the high bands to compensate for the processing delay in the low
+  // band.
+  void DelayHighBands(AudioBuffer* audio);
+
+  static const size_t kMaxNumNoiseEstimatesToBuffer = 5;
+
+  const size_t freqs_;         // Num frequencies in frequency domain.
+  const size_t num_noise_bins_;
+  const size_t chunk_length_;  // Chunk size in samples.
+  const size_t bank_size_;     // Num ERB filters.
+  const int sample_rate_hz_;
+  const size_t num_render_channels_;
+
+  intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
+  intelligibility::PowerEstimator<float> noise_power_estimator_;
+  std::vector<float> filtered_clear_pow_;
+  std::vector<float> filtered_noise_pow_;
+  std::vector<float> center_freqs_;
+  std::vector<std::vector<float>> capture_filter_bank_;
+  std::vector<std::vector<float>> render_filter_bank_;
+  size_t start_freq_;
+
+  std::vector<float> gains_eq_;  // Pre-filter modified gains.
+  intelligibility::GainApplier gain_applier_;
+
+  std::unique_ptr<LappedTransform> render_mangler_;
+
+  VoiceActivityDetector vad_;
+  std::vector<int16_t> audio_s16_;
+  size_t chunks_since_voice_;
+  bool is_speech_;
+  float snr_;
+  bool is_active_;
+
+  unsigned long int num_chunks_;
+  unsigned long int num_active_chunks_;
+
+  std::vector<float> noise_estimation_buffer_;
+  SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>
+      noise_estimation_queue_;
+
+  std::vector<std::unique_ptr<intelligibility::DelayBuffer>>
+      high_bands_buffers_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
new file mode 100644
index 0000000000..98a8dae469
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
@@ -0,0 +1,536 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "api/array_view.h"
+#include "common_audio/signal_processing/include/signal_processing_library.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
+#include "modules/audio_processing/noise_suppression_impl.h"
+#include "modules/audio_processing/test/audio_buffer_tools.h"
+#include "modules/audio_processing/test/bitexactness_tools.h"
+#include "rtc_base/arraysize.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+namespace {
+
+// Target output for ERB create test. Generated with matlab.
+const float kTestCenterFreqs[] = {
+    14.5213f, 29.735f,  45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f,
+    137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f,
+    309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f,
+    551.371f, 593.293f, 637.386f, 683.77f,  732.581f, 783.96f,  838.06f,
+    895.046f, 955.09f,  1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f,
+    1391.22f, 1478.83f, 1571.5f,  1669.55f, 1773.37f, 1883.37f, 2000.f};
+const float kTestFilterBank[][33] = {
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f},
+    {0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f,
+     0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f,
+     0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,       0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f,
+     0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f,
+     0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f, 0.f, 0.f},
+    {0.f,       0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.333333f,
+     0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,       0.f,        0.f, 0.f},
+    {0.f,       0.f,       0.f,   0.f,       0.f,        0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,   0.f,       0.f,        0.f, 0.f, 0.f, 0.f,
+     0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,   0.f,       0.f,        0.f},
+    {0.f,       0.f,   0.f,       0.f,        0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f, 0.307692f, 0.333333f,
+     0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f},
+    {0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.166667f,  0.363636f, 0.333333f, 0.242424f,
+     0.190476f, 0.133333f, 0.0689655f, 0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f},
+    {0.f,        0.f, 0.f, 0.f, 0.f,       0.f,      0.f,       0.f,  0.f,
+     0.f,        0.f, 0.f, 0.f, 0.f,       0.f,      0.f,       0.f,  0.f,
+     0.f,        0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f,
+     0.0714286f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f,
+     0.125f, 0.0655738f, 0.f,      0.f,      0.f},
+    {0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.15873f,   0.333333f, 0.344828f, 0.357143f,
+     0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f},
+    {0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.172414f, 0.357143f,
+     0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f},
+    {0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}};
+static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
+              "Test filterbank badly initialized.");
+
+// Target output for gain solving test. Generated with matlab.
+const size_t kTestStartFreq = 12;  // Lowest integral frequency for ERBs.
+const float kTestZeroVar = 1.f;
+const float kTestNonZeroVarLambdaTop[] = {
+    1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+static_assert(arraysize(kTestCenterFreqs) ==
+                  arraysize(kTestNonZeroVarLambdaTop),
+              "Power test data badly initialized.");
+const float kMaxTestError = 0.005f;
+
+// Enhancer initialization parameters.
+const int kSamples = 10000;
+const int kSampleRate = 4000;
+const int kNumChannels = 1;
+const int kFragmentSize = kSampleRate / 100;
+const size_t kNumNoiseBins = 129;
+const size_t kNumBands = 1;
+
+// Number of frames to process in the bitexactness tests.
+const size_t kNumFramesToProcess = 1000;
+
+int IntelligibilityEnhancerSampleRate(int sample_rate_hz) {
+  return (sample_rate_hz > AudioProcessing::kSampleRate16kHz
+              ? AudioProcessing::kSampleRate16kHz
+              : sample_rate_hz);
+}
+
+// Process one frame of data and produce the output.
+void ProcessOneFrame(int sample_rate_hz,
+                     AudioBuffer* render_audio_buffer,
+                     AudioBuffer* capture_audio_buffer,
+                     NoiseSuppressionImpl* noise_suppressor,
+                     IntelligibilityEnhancer* intelligibility_enhancer) {
+  if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
+    render_audio_buffer->SplitIntoFrequencyBands();
+    capture_audio_buffer->SplitIntoFrequencyBands();
+  }
+
+  intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer);
+
+  noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer);
+  noise_suppressor->ProcessCaptureAudio(capture_audio_buffer);
+
+  intelligibility_enhancer->SetCaptureNoiseEstimate(
+      noise_suppressor->NoiseEstimate(), 0);
+
+  if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
+    render_audio_buffer->MergeFrequencyBands();
+  }
+}
+
+// Processes a specified amount of frames, verifies the results and reports
+// any errors.
+void RunBitexactnessTest(int sample_rate_hz,
+                         size_t num_channels,
+                         rtc::ArrayView<const float> output_reference) {
+  const StreamConfig render_config(sample_rate_hz, num_channels, false);
+  AudioBuffer render_buffer(
+      render_config.num_frames(), render_config.num_channels(),
+      render_config.num_frames(), render_config.num_channels(),
+      render_config.num_frames());
+  test::InputAudioFile render_file(
+      test::GetApmRenderTestVectorFileName(sample_rate_hz));
+  std::vector<float> render_input(render_buffer.num_frames() *
+                                  render_buffer.num_channels());
+
+  const StreamConfig capture_config(sample_rate_hz, num_channels, false);
+  AudioBuffer capture_buffer(
+      capture_config.num_frames(), capture_config.num_channels(),
+      capture_config.num_frames(), capture_config.num_channels(),
+      capture_config.num_frames());
+  test::InputAudioFile capture_file(
+      test::GetApmCaptureTestVectorFileName(sample_rate_hz));
+  std::vector<float> capture_input(render_buffer.num_frames() *
+                                   capture_buffer.num_channels());
+
+  rtc::CriticalSection crit_capture;
+  NoiseSuppressionImpl noise_suppressor(&crit_capture);
+  noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz);
+  noise_suppressor.Enable(true);
+
+  IntelligibilityEnhancer intelligibility_enhancer(
+      IntelligibilityEnhancerSampleRate(sample_rate_hz),
+      render_config.num_channels(), kNumBands,
+      NoiseSuppressionImpl::num_noise_bins());
+
+  for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) {
+    ReadFloatSamplesFromStereoFile(render_buffer.num_frames(),
+                                   render_buffer.num_channels(), &render_file,
+                                   render_input);
+    ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(),
+                                   capture_buffer.num_channels(), &capture_file,
+                                   capture_input);
+
+    test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer);
+    test::CopyVectorToAudioBuffer(capture_config, capture_input,
+                                  &capture_buffer);
+
+    ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer,
+                    &noise_suppressor, &intelligibility_enhancer);
+  }
+
+  // Extract and verify the test results.
+  std::vector<float> render_output;
+  test::ExtractVectorFromAudioBuffer(render_config, &render_buffer,
+                                     &render_output);
+
+  const float kElementErrorBound = 1.f / static_cast<float>(1 << 15);
+
+  // Compare the output with the reference. Only the first values of the output
+  // from last frame processed are compared in order not having to specify all
+  // preceeding frames as testvectors. As the algorithm being tested has a
+  // memory, testing only the last frame implicitly also tests the preceeding
+  // frames.
+  EXPECT_TRUE(test::VerifyDeinterleavedArray(
+      render_buffer.num_frames(), render_config.num_channels(),
+      output_reference, render_output, kElementErrorBound));
+}
+
+float float_rand() {
+  return std::rand() * 2.f / RAND_MAX - 1;
+}
+
+}  // namespace
+
+class IntelligibilityEnhancerTest : public ::testing::Test {
+ protected:
+  IntelligibilityEnhancerTest()
+      : clear_buffer_(kFragmentSize,
+                      kNumChannels,
+                      kFragmentSize,
+                      kNumChannels,
+                      kFragmentSize),
+        stream_config_(kSampleRate, kNumChannels),
+        clear_data_(kSamples),
+        noise_data_(kNumNoiseBins),
+        orig_data_(kSamples) {
+    std::srand(1);
+    enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
+                                           kNumNoiseBins));
+  }
+
+  bool CheckUpdate() {
+    enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands,
+                                           kNumNoiseBins));
+    float* clear_cursor = clear_data_.data();
+    for (int i = 0; i < kSamples; i += kFragmentSize) {
+      enh_->SetCaptureNoiseEstimate(noise_data_, 1);
+      clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
+      enh_->ProcessRenderAudio(&clear_buffer_);
+      clear_buffer_.CopyTo(stream_config_, &clear_cursor);
+      clear_cursor += kFragmentSize;
+    }
+    for (int i = initial_delay_; i < kSamples; i++) {
+      if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) >
+          kMaxTestError) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  std::unique_ptr<IntelligibilityEnhancer> enh_;
+  // Render clean speech buffer.
+  AudioBuffer clear_buffer_;
+  StreamConfig stream_config_;
+  std::vector<float> clear_data_;
+  std::vector<float> noise_data_;
+  std::vector<float> orig_data_;
+  size_t initial_delay_;
+};
+
+// For each class of generated data, tests that render stream is updated when
+// it should be.
+TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
+  initial_delay_ = enh_->render_mangler_->initial_delay();
+  std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
+  std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
+  std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
+  EXPECT_FALSE(CheckUpdate());
+  std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
+  orig_data_ = clear_data_;
+  EXPECT_FALSE(CheckUpdate());
+  std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
+  orig_data_ = clear_data_;
+  std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
+  FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data());
+  EXPECT_TRUE(CheckUpdate());
+}
+
+// Tests ERB bank creation, comparing against matlab output.
+TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
+  ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_);
+  for (size_t i = 0; i < enh_->bank_size_; ++i) {
+    EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError);
+    ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_);
+    for (size_t j = 0; j < enh_->freqs_; ++j) {
+      EXPECT_NEAR(kTestFilterBank[i][j], enh_->render_filter_bank_[i][j],
+                  kMaxTestError);
+    }
+  }
+}
+
+// Tests analytic solution for optimal gains, comparing
+// against matlab output.
+TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
+  ASSERT_EQ(kTestStartFreq, enh_->start_freq_);
+  std::vector<float> sols(enh_->bank_size_);
+  float lambda = -0.001f;
+  for (size_t i = 0; i < enh_->bank_size_; i++) {
+    enh_->filtered_clear_pow_[i] = 0.f;
+    enh_->filtered_noise_pow_[i] = 0.f;
+  }
+  enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
+  for (size_t i = 0; i < enh_->bank_size_; i++) {
+    EXPECT_NEAR(kTestZeroVar, sols[i], kMaxTestError);
+  }
+  for (size_t i = 0; i < enh_->bank_size_; i++) {
+    enh_->filtered_clear_pow_[i] = static_cast<float>(i + 1);
+    enh_->filtered_noise_pow_[i] = static_cast<float>(enh_->bank_size_ - i);
+  }
+  enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
+  for (size_t i = 0; i < enh_->bank_size_; i++) {
+    EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
+  }
+  lambda = -1.f;
+  enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data());
+  for (size_t i = 0; i < enh_->bank_size_; i++) {
+    EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
+  }
+}
+
+TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) {
+  const float kGain = 2.f;
+  const float kTolerance = 0.007f;
+  std::vector<float> noise(kNumNoiseBins);
+  std::vector<float> noise_psd(kNumNoiseBins);
+  std::generate(noise.begin(), noise.end(), float_rand);
+  for (size_t i = 0; i < kNumNoiseBins; ++i) {
+    noise_psd[i] = kGain * kGain * noise[i] * noise[i];
+  }
+  float* clear_cursor = clear_data_.data();
+  for (size_t i = 0; i < kNumFramesToProcess; ++i) {
+    enh_->SetCaptureNoiseEstimate(noise, kGain);
+    clear_buffer_.CopyFrom(&clear_cursor, stream_config_);
+    enh_->ProcessRenderAudio(&clear_buffer_);
+  }
+  const std::vector<float>& estimated_psd =
+      enh_->noise_power_estimator_.power();
+  for (size_t i = 0; i < kNumNoiseBins; ++i) {
+    EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i],
+              kTolerance);
+  }
+}
+
+TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) {
+  const int kTestSampleRate = AudioProcessing::kSampleRate32kHz;
+  const int kTestSplitRate = AudioProcessing::kSampleRate16kHz;
+  const size_t kTestNumBands =
+      rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate);
+  const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100);
+  const size_t kTestSplitFragmentSize =
+      rtc::CheckedDivExact(kTestSplitRate, 100);
+  enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels,
+                                         kTestNumBands, kNumNoiseBins));
+  size_t initial_delay = enh_->render_mangler_->initial_delay();
+  std::vector<float> rand_gen_buf(kTestFragmentSize);
+  AudioBuffer original_buffer(kTestFragmentSize, kNumChannels,
+                              kTestFragmentSize, kNumChannels,
+                              kTestFragmentSize);
+  AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize,
+                           kNumChannels, kTestFragmentSize);
+  for (size_t i = 0u; i < kTestNumBands; ++i) {
+    std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand);
+    original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
+                                                      rand_gen_buf.size());
+    audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(),
+                                                   rand_gen_buf.size());
+  }
+  enh_->ProcessRenderAudio(&audio_buffer);
+  for (size_t i = 0u; i < kTestNumBands; ++i) {
+    const float* original_ptr = original_buffer.split_bands_const_f(0)[i];
+    const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i];
+    for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) {
+      EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]),
+                kMaxTestError);
+    }
+  }
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) {
+  const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) {
+  const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) {
+  const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) {
+  const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) {
+  const float kOutputReference[] = {0.021454f,  0.035919f, 0.026428f,
+                                    -0.000641f, 0.000366f, 0.000641f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) {
+  const float kOutputReference[] = {0.021362f,  0.035736f,  0.023895f,
+                                    -0.001404f, -0.001465f, 0.000549f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) {
+  const float kOutputReference[] = {0.030641f,  0.027406f,  0.028321f,
+                                    -0.001343f, -0.004578f, 0.000977f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference);
+}
+
+TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) {
+  const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f,
+                                    -0.012975f, -0.015940f, -0.017820f};
+
+  RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
+}
+
+}  // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
new file mode 100644
index 0000000000..b6917f4407
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
@@ -0,0 +1,94 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+#include <limits>
+
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+
+namespace intelligibility {
+
+namespace {
+
+const float kMinFactor = 0.01f;
+const float kMaxFactor = 100.f;
+
+// Return |current| changed towards |target|, with the relative change being at
+// most |limit|.
+float UpdateFactor(float target, float current, float limit) {
+  const float gain = target / (current + std::numeric_limits<float>::epsilon());
+  const float clamped_gain = rtc::SafeClamp(gain, 1 - limit, 1 + limit);
+  return rtc::SafeClamp(current * clamped_gain, kMinFactor, kMaxFactor);
+}
+
+}  // namespace
+
+template<typename T>
+PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay)
+    : power_(num_freqs, 0.f), decay_(decay) {}
+
+template<typename T>
+void PowerEstimator<T>::Step(const T* data) {
+  for (size_t i = 0; i < power_.size(); ++i) {
+    power_[i] = decay_ * power_[i] +
+                (1.f - decay_) * std::abs(data[i]) * std::abs(data[i]);
+  }
+}
+
+template class PowerEstimator<float>;
+template class PowerEstimator<std::complex<float>>;
+
+GainApplier::GainApplier(size_t freqs, float relative_change_limit)
+    : num_freqs_(freqs),
+      relative_change_limit_(relative_change_limit),
+      target_(freqs, 1.f),
+      current_(freqs, 1.f) {}
+
+GainApplier::~GainApplier() {}
+
+void GainApplier::Apply(const std::complex<float>* in_block,
+                        std::complex<float>* out_block) {
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
+    out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
+  }
+}
+
+DelayBuffer::DelayBuffer(size_t delay, size_t num_channels)
+    : buffer_(num_channels, std::vector<float>(delay, 0.f)), read_index_(0u) {}
+
+DelayBuffer::~DelayBuffer() {}
+
+void DelayBuffer::Delay(float* const* data, size_t length) {
+  size_t sample_index = read_index_;
+  for (size_t i = 0u; i < buffer_.size(); ++i) {
+    sample_index = read_index_;
+    for (size_t j = 0u; j < length; ++j) {
+      float swap = data[i][j];
+      data[i][j] = buffer_[i][sample_index];
+      buffer_[i][sample_index] = swap;
+      if (++sample_index == buffer_.size()) {
+        sample_index = 0u;
+      }
+    }
+  }
+  read_index_ = sample_index;
+}
+
+}  // namespace intelligibility
+
+}  // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
new file mode 100644
index 0000000000..4dc17d50b5
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
@@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
+#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
+
+#include <complex>
+#include <vector>
+
+namespace webrtc {
+
+namespace intelligibility {
+
+// Internal helper for computing the power of a stream of arrays.
+// The result is an array of power per position: the i-th power is the power of
+// the stream of data on the i-th positions in the input arrays.
+template <typename T>
+class PowerEstimator {
+ public:
+  // Construct an instance for the given input array length (|freqs|), with the
+  // appropriate parameters. |decay| is the forgetting factor.
+  PowerEstimator(size_t freqs, float decay);
+
+  // Add a new data point to the series.
+  void Step(const T* data);
+
+  // The current power array.
+  const std::vector<float>& power() { return power_; };
+
+ private:
+  // The current power array.
+  std::vector<float> power_;
+
+  const float decay_;
+};
+
+// Helper class for smoothing gain changes. On each application step, the
+// currently used gains are changed towards a set of settable target gains,
+// constrained by a limit on the relative changes.
+class GainApplier {
+ public:
+  GainApplier(size_t freqs, float relative_change_limit);
+
+  ~GainApplier();
+
+  // Copy |in_block| to |out_block|, multiplied by the current set of gains,
+  // and step the current set of gains towards the target set.
+  void Apply(const std::complex<float>* in_block,
+             std::complex<float>* out_block);
+
+  // Return the current target gain set. Modify this array to set the targets.
+  float* target() { return target_.data(); }
+
+ private:
+  const size_t num_freqs_;
+  const float relative_change_limit_;
+  std::vector<float> target_;
+  std::vector<float> current_;
+};
+
+// Helper class to delay a signal by an integer number of samples.
+class DelayBuffer {
+ public:
+  DelayBuffer(size_t delay, size_t num_channels);
+
+  ~DelayBuffer();
+
+  void Delay(float* const* data, size_t length);
+
+ private:
+  std::vector<std::vector<float>> buffer_;
+  size_t read_index_;
+};
+
+}  // namespace intelligibility
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
new file mode 100644
index 0000000000..fea394c338
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
@@ -0,0 +1,79 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cmath>
+#include <complex>
+#include <vector>
+
+#include "modules/audio_processing/intelligibility/intelligibility_utils.h"
+#include "rtc_base/arraysize.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+namespace intelligibility {
+
+std::vector<std::vector<std::complex<float>>> GenerateTestData(size_t freqs,
+                                                               size_t samples) {
+  std::vector<std::vector<std::complex<float>>> data(samples);
+  for (size_t i = 0; i < samples; ++i) {
+    for (size_t j = 0; j < freqs; ++j) {
+      const float val = 0.99f / ((i + 1) * (j + 1));
+      data[i].push_back(std::complex<float>(val, val));
+    }
+  }
+  return data;
+}
+
+// Tests PowerEstimator, for all power step types.
+TEST(IntelligibilityUtilsTest, TestPowerEstimator) {
+  const size_t kFreqs = 10;
+  const size_t kSamples = 100;
+  const float kDecay = 0.5f;
+  const std::vector<std::vector<std::complex<float>>> test_data(
+      GenerateTestData(kFreqs, kSamples));
+  PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay);
+  EXPECT_EQ(0, power_estimator.power()[0]);
+
+  // Makes sure Step is doing something.
+  power_estimator.Step(test_data[0].data());
+  for (size_t i = 1; i < kSamples; ++i) {
+    power_estimator.Step(test_data[i].data());
+    for (size_t j = 0; j < kFreqs; ++j) {
+      EXPECT_GE(power_estimator.power()[j], 0.f);
+      EXPECT_LE(power_estimator.power()[j], 1.f);
+    }
+  }
+}
+
+// Tests gain applier.
+TEST(IntelligibilityUtilsTest, TestGainApplier) {
+  const size_t kFreqs = 10;
+  const size_t kSamples = 100;
+  const float kChangeLimit = 0.1f;
+  GainApplier gain_applier(kFreqs, kChangeLimit);
+  const std::vector<std::vector<std::complex<float>>> in_data(
+      GenerateTestData(kFreqs, kSamples));
+  std::vector<std::vector<std::complex<float>>> out_data(
+      GenerateTestData(kFreqs, kSamples));
+  for (size_t i = 0; i < kSamples; ++i) {
+    gain_applier.Apply(in_data[i].data(), out_data[i].data());
+    for (size_t j = 0; j < kFreqs; ++j) {
+      EXPECT_GT(out_data[i][j].real(), 0.f);
+      EXPECT_LT(out_data[i][j].real(), 1.f);
+      EXPECT_GT(out_data[i][j].imag(), 0.f);
+      EXPECT_LT(out_data[i][j].imag(), 1.f);
+    }
+  }
+}
+
+}  // namespace intelligibility
+
+}  // namespace webrtc
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
new file mode 100644
index 0000000000..b90449caa3
--- /dev/null
+++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "common_audio/channel_buffer.h"
+#include "common_audio/include/audio_util.h"
+#include "common_audio/wav_file.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h"
+#include "modules/audio_processing/noise_suppression_impl.h"
+#include "rtc_base/criticalsection.h"
+#include "rtc_base/flags.h"
+
+using std::complex;
+
+namespace webrtc {
+namespace {
+
+DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
+DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
+DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
+DEFINE_bool(help, false, "Print this message.");
+
+int int_main(int argc, char* argv[]) {
+  if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) {
+    return 1;
+  }
+  if (FLAG_help) {
+    rtc::FlagList::Print(nullptr, false);
+    return 0;
+  }
+  if (argc != 1) {
+    printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
+    return 0;
+  }
+
+  WavReader in_file(FLAG_clear_file);
+  WavReader noise_file(FLAG_noise_file);
+  WavWriter out_file(FLAG_out_file, in_file.sample_rate(),
+                     in_file.num_channels());
+  rtc::CriticalSection crit;
+  NoiseSuppressionImpl ns(&crit);
+  IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u,
+                              NoiseSuppressionImpl::num_noise_bins());
+  ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
+  ns.Enable(true);
+  const size_t in_samples = noise_file.sample_rate() / 100;
+  const size_t noise_samples = noise_file.sample_rate() / 100;
+  std::vector<float> in(in_samples * in_file.num_channels());
+  std::vector<float> noise(noise_samples * noise_file.num_channels());
+  ChannelBuffer<float> in_buf(in_samples, in_file.num_channels());
+  ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels());
+  AudioBuffer capture_audio(noise_samples, noise_file.num_channels(),
+                            noise_samples, noise_file.num_channels(),
+                            noise_samples);
+  AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples,
+                           in_file.num_channels(), in_samples);
+  StreamConfig noise_config(noise_file.sample_rate(),
+                            noise_file.num_channels());
+  StreamConfig in_config(in_file.sample_rate(), in_file.num_channels());
+  while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
+         noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
+    FloatS16ToFloat(noise.data(), noise.size(), noise.data());
+    FloatS16ToFloat(in.data(), in.size(), in.data());
+    Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
+                 in_buf.channels());
+    Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(),
+                 noise_buf.channels());
+    capture_audio.CopyFrom(noise_buf.channels(), noise_config);
+    render_audio.CopyFrom(in_buf.channels(), in_config);
+    ns.AnalyzeCaptureAudio(&capture_audio);
+    ns.ProcessCaptureAudio(&capture_audio);
+    enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1);
+    enh.ProcessRenderAudio(&render_audio);
+    render_audio.CopyTo(in_config, in_buf.channels());
+    Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
+               in.data());
+    FloatToFloatS16(in.data(), in.size(), in.data());
+    out_file.WriteSamples(in.data(), in.size());
+  }
+
+  return 0;
+}
+
+}  // namespace
+}  // namespace webrtc
+
+int main(int argc, char* argv[]) {
+  return webrtc::int_main(argc, argv);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
commit	2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree	b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility
parent	Initial commit. (diff)
download	firefox-upstream.tar.xz firefox-upstream.zip