/* * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_processing/aec3/comfort_noise_generator.h" // Defines WEBRTC_ARCH_X86_FAMILY, used below. #include "rtc_base/system/arch.h" #if defined(WEBRTC_ARCH_X86_FAMILY) #include #endif #include #include #include #include #include #include #include "common_audio/signal_processing/include/signal_processing_library.h" #include "modules/audio_processing/aec3/vector_math.h" #include "rtc_base/checks.h" namespace webrtc { namespace { // Computes the noise floor value that matches a WGN input of noise_floor_dbfs. float GetNoiseFloorFactor(float noise_floor_dbfs) { // kdBfsNormalization = 20.f*log10(32768.f). constexpr float kdBfsNormalization = 90.30899869919436f; return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f); } // Table of sqrt(2) * sin(2*pi*i/32). constexpr float kSqrt2Sin[32] = { +0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f, +1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f, +1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f, +0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f, -1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f, -1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f, -0.5411961f, -0.2758994f}; void GenerateComfortNoise(Aec3Optimization optimization, const std::array& N2, uint32_t* seed, FftData* lower_band_noise, FftData* upper_band_noise) { FftData* N_low = lower_band_noise; FftData* N_high = upper_band_noise; // Compute square root spectrum. std::array N; std::copy(N2.begin(), N2.end(), N.begin()); aec3::VectorMath(optimization).Sqrt(N); // Compute the noise level for the upper bands. constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1); constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2; const float high_band_noise_level = std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) * kOneByNumBands; // The analysis and synthesis windowing cause loss of power when // cross-fading the noise where frames are completely uncorrelated // (generated with random phase), hence the factor sqrt(2). // This is not the case for the speech signal where the input is overlapping // (strong correlation). N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] = N_high->re[kFftLengthBy2] = 0.f; for (size_t k = 1; k < kFftLengthBy2; k++) { constexpr int kIndexMask = 32 - 1; // Generate a random 31-bit integer. seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1); // Convert to a 5-bit index. int i = seed[0] >> 26; // y = sqrt(2) * sin(a) const float x = kSqrt2Sin[i]; // x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2) const float y = kSqrt2Sin[(i + 8) & kIndexMask]; // Form low-frequency noise via spectral shaping. N_low->re[k] = N[k] * x; N_low->im[k] = N[k] * y; // Form the high-frequency noise via simple levelling. N_high->re[k] = high_band_noise_level * x; N_high->im[k] = high_band_noise_level * y; } } } // namespace ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config, Aec3Optimization optimization, size_t num_capture_channels) : optimization_(optimization), seed_(42), num_capture_channels_(num_capture_channels), noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)), N2_initial_( std::make_unique>>( num_capture_channels_)), Y2_smoothed_(num_capture_channels_), N2_(num_capture_channels_) { for (size_t ch = 0; ch < num_capture_channels_; ++ch) { (*N2_initial_)[ch].fill(0.f); Y2_smoothed_[ch].fill(0.f); N2_[ch].fill(1.0e6f); } } ComfortNoiseGenerator::~ComfortNoiseGenerator() = default; void ComfortNoiseGenerator::Compute( bool saturated_capture, rtc::ArrayView> capture_spectrum, rtc::ArrayView lower_band_noise, rtc::ArrayView upper_band_noise) { const auto& Y2 = capture_spectrum; if (!saturated_capture) { // Smooth Y2. for (size_t ch = 0; ch < num_capture_channels_; ++ch) { std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(), Y2[ch].begin(), Y2_smoothed_[ch].begin(), [](float a, float b) { return a + 0.1f * (b - a); }); } if (N2_counter_ > 50) { // Update N2 from Y2_smoothed. for (size_t ch = 0; ch < num_capture_channels_; ++ch) { std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(), N2_[ch].begin(), [](float a, float b) { return b < a ? (0.9f * b + 0.1f * a) * 1.0002f : a * 1.0002f; }); } } if (N2_initial_) { if (++N2_counter_ == 1000) { N2_initial_.reset(); } else { // Compute the N2_initial from N2. for (size_t ch = 0; ch < num_capture_channels_; ++ch) { std::transform(N2_[ch].begin(), N2_[ch].end(), (*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(), [](float a, float b) { return a > b ? b + 0.001f * (a - b) : a; }); } } } for (size_t ch = 0; ch < num_capture_channels_; ++ch) { for (auto& n : N2_[ch]) { n = std::max(n, noise_floor_); } if (N2_initial_) { for (auto& n : (*N2_initial_)[ch]) { n = std::max(n, noise_floor_); } } } } // Choose N2 estimate to use. const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_; for (size_t ch = 0; ch < num_capture_channels_; ++ch) { GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch], &upper_band_noise[ch]); } } } // namespace webrtc