diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/libwebrtc/modules/audio_processing/agc2 | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/modules/audio_processing/agc2')
115 files changed, 15074 insertions, 0 deletions
diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn new file mode 100644 index 0000000000..ed992488ad --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/BUILD.gn @@ -0,0 +1,309 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") + +group("agc2") { + deps = [ + ":adaptive_digital", + ":fixed_digital", + ] +} + +rtc_library("adaptive_digital") { + sources = [ + "adaptive_digital_gain_applier.cc", + "adaptive_digital_gain_applier.h", + "adaptive_digital_gain_controller.cc", + "adaptive_digital_gain_controller.h", + "adaptive_mode_level_estimator.cc", + "adaptive_mode_level_estimator.h", + "saturation_protector.cc", + "saturation_protector.h", + "saturation_protector_buffer.cc", + "saturation_protector_buffer.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + ":cpu_features", + ":gain_applier", + ":noise_level_estimator", + ":vad_wrapper", + "..:api", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_compare", + "../../../rtc_base:safe_minmax", + "../../../system_wrappers:metrics", + ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + +rtc_library("biquad_filter") { + visibility = [ "./*" ] + sources = [ + "biquad_filter.cc", + "biquad_filter.h", + ] + deps = [ + "../../../api:array_view", + "../../../rtc_base:macromagic", + ] +} + +rtc_source_set("common") { + sources = [ "agc2_common.h" ] +} + +rtc_library("fixed_digital") { + sources = [ + "fixed_digital_level_estimator.cc", + "fixed_digital_level_estimator.h", + "interpolated_gain_curve.cc", + "interpolated_gain_curve.h", + "limiter.cc", + "limiter.h", + ] + + visibility = [ + "..:gain_controller2", + "../../audio_mixer:audio_mixer_impl", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:gtest_prod", + "../../../rtc_base:safe_conversions", + "../../../rtc_base:safe_minmax", + "../../../rtc_base:stringutils", + "../../../system_wrappers:metrics", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_library("gain_applier") { + sources = [ + "gain_applier.cc", + "gain_applier.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + deps = [ + ":common", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:safe_minmax", + ] +} + +rtc_library("noise_level_estimator") { + sources = [ + "noise_level_estimator.cc", + "noise_level_estimator.h", + ] + visibility = [ "./*" ] + deps = [ + ":biquad_filter", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../system_wrappers", + ] + + configs += [ "..:apm_debug_dump" ] +} + +rtc_library("vad_wrapper") { + sources = [ + "vad_wrapper.cc", + "vad_wrapper.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":common", + ":cpu_features", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "rnn_vad", + "rnn_vad:rnn_vad_common", + ] +} + +rtc_library("cpu_features") { + sources = [ + "cpu_features.cc", + "cpu_features.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + deps = [ + "../../../rtc_base:stringutils", + "../../../rtc_base/system:arch", + "../../../system_wrappers", + ] +} + +if (rtc_include_tests) { +rtc_library("adaptive_digital_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ + "adaptive_digital_gain_applier_unittest.cc", + "adaptive_mode_level_estimator_unittest.cc", + "gain_applier_unittest.cc", + "saturation_protector_buffer_unittest.cc", + "saturation_protector_unittest.cc", + ] + deps = [ + ":adaptive_digital", + ":common", + ":gain_applier", + ":test_utils", + "..:api", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + +rtc_library("biquad_filter_unittests") { + testonly = true + sources = [ "biquad_filter_unittest.cc" ] + deps = [ + ":biquad_filter", + "../../../rtc_base:gunit_helpers", + ] +} + +rtc_library("fixed_digital_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ + "agc2_testing_common_unittest.cc", + "compute_interpolated_gain_curve.cc", + "compute_interpolated_gain_curve.h", + "fixed_digital_level_estimator_unittest.cc", + "interpolated_gain_curve_unittest.cc", + "limiter_db_gain_curve.cc", + "limiter_db_gain_curve.h", + "limiter_db_gain_curve_unittest.cc", + "limiter_unittest.cc", + ] + deps = [ + ":common", + ":fixed_digital", + ":test_utils", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + "../../../system_wrappers:metrics", + ] +} + +rtc_library("noise_estimator_unittests") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "noise_level_estimator_unittest.cc" ] + deps = [ + ":noise_level_estimator", + ":test_utils", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../api:function_view", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + ] +} + +rtc_library("vad_wrapper_unittests") { + testonly = true + sources = [ "vad_wrapper_unittest.cc" ] + deps = [ + ":common", + ":vad_wrapper", + "..:audio_frame_view", + "../../../rtc_base:checks", + "../../../rtc_base:gunit_helpers", + "../../../rtc_base:safe_compare", + "../../../test:test_support", + ] +} + +rtc_library("test_utils") { + testonly = true + visibility = [ + ":*", + "..:audio_processing_unittests", + ] + sources = [ + "agc2_testing_common.cc", + "agc2_testing_common.h", + "vector_float_frame.cc", + "vector_float_frame.h", + ] + deps = [ + "..:audio_frame_view", + "../../../rtc_base:checks", + "../../../rtc_base:random", + ] +} +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc new file mode 100644 index 0000000000..a34f598874 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" + +#include <algorithm> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +constexpr int kHeadroomHistogramMin = 0; +constexpr int kHeadroomHistogramMax = 50; +constexpr int kGainDbHistogramMax = 30; + +// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`. +// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a +// safety margin to allow transient peaks to exceed the target peak level +// without clipping. +float ComputeGainDb(float input_level_dbfs, + const AdaptiveDigitalConfig& config) { + // If the level is very low, apply the maximum gain. + if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) { + return config.max_gain_db; + } + // We expect to end up here most of the time: the level is below + // -headroom, but we can boost it to -headroom. + if (input_level_dbfs < -config.headroom_db) { + return -config.headroom_db - input_level_dbfs; + } + // The level is too high and we can't boost. + RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db); + return 0.0f; +} + +// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs` +// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns +// `target_gain_db` so that the output noise level equals +// `max_output_noise_level_dbfs`. +float LimitGainByNoise(float target_gain_db, + float input_noise_level_dbfs, + float max_output_noise_level_dbfs, + ApmDataDumper& apm_data_dumper) { + const float max_allowed_gain_db = + max_output_noise_level_dbfs - input_noise_level_dbfs; + apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db", + max_allowed_gain_db); + return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f)); +} + +float LimitGainByLowConfidence(float target_gain_db, + float last_gain_db, + float limiter_audio_level_dbfs, + bool estimate_is_confident) { + if (estimate_is_confident || + limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) { + return target_gain_db; + } + const float limiter_level_dbfs_before_gain = + limiter_audio_level_dbfs - last_gain_db; + + // Compute a new gain so that `limiter_level_dbfs_before_gain` + + // `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`. + const float new_target_gain_db = std::max( + kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f); + return std::min(new_target_gain_db, target_gain_db); +} + +// Computes how the gain should change during this frame. +// Return the gain difference in db to 'last_gain_db'. +float ComputeGainChangeThisFrameDb(float target_gain_db, + float last_gain_db, + bool gain_increase_allowed, + float max_gain_decrease_db, + float max_gain_increase_db) { + RTC_DCHECK_GT(max_gain_decrease_db, 0); + RTC_DCHECK_GT(max_gain_increase_db, 0); + float target_gain_difference_db = target_gain_db - last_gain_db; + if (!gain_increase_allowed) { + target_gain_difference_db = std::min(target_gain_difference_db, 0.0f); + } + return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db, + max_gain_increase_db); +} + +// Copies the (multichannel) audio samples from `src` into `dst`. +void CopyAudio(AudioFrameView<const float> src, + std::vector<std::vector<float>>& dst) { + RTC_DCHECK_GT(src.num_channels(), 0); + RTC_DCHECK_GT(src.samples_per_channel(), 0); + RTC_DCHECK_EQ(dst.size(), src.num_channels()); + for (int c = 0; c < src.num_channels(); ++c) { + rtc::ArrayView<const float> channel_view = src.channel(c); + RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel()); + RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel()); + std::copy(channel_view.begin(), channel_view.end(), dst[c].begin()); + } +} + +} // namespace + +AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int sample_rate_hz, + int num_channels) + : apm_data_dumper_(apm_data_dumper), + gain_applier_( + /*hard_clip_samples=*/false, + /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)), + config_(config), + max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second * + kFrameDurationMs / 1000.0f), + calls_since_last_gain_log_(0), + frames_to_gain_increase_allowed_( + config_.adjacent_speech_frames_threshold), + last_gain_db_(config_.initial_gain_db) { + RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f); + RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); + RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f); + RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f); + Initialize(sample_rate_hz, num_channels); +} + +void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz, + int num_channels) { + if (!config_.dry_run) { + return; + } + RTC_DCHECK_GT(sample_rate_hz, 0); + RTC_DCHECK_GT(num_channels, 0); + int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100); + bool sample_rate_changed = + dry_run_frame_.empty() || // Handle initialization. + dry_run_frame_[0].size() != static_cast<size_t>(frame_size); + bool num_channels_changed = + dry_run_channels_.size() != static_cast<size_t>(num_channels); + if (sample_rate_changed || num_channels_changed) { + // Resize the multichannel audio vector and update the channel pointers. + dry_run_frame_.resize(num_channels); + dry_run_channels_.resize(num_channels); + for (int c = 0; c < num_channels; ++c) { + dry_run_frame_[c].resize(frame_size); + dry_run_channels_[c] = dry_run_frame_[c].data(); + } + } +} + +void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, + AudioFrameView<float> frame) { + RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f); + RTC_DCHECK_GE(frame.num_channels(), 1); + RTC_DCHECK( + frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 || + frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480) + << "`frame` does not look like a 10 ms frame for an APM supported sample " + "rate"; + + // Compute the input level used to select the desired gain. + RTC_DCHECK_GT(info.headroom_db, 0.0f); + const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db; + + const float target_gain_db = LimitGainByLowConfidence( + LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_), + info.noise_rms_dbfs, config_.max_output_noise_level_dbfs, + *apm_data_dumper_), + last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable); + + // Forbid increasing the gain until enough adjacent speech frames are + // observed. + bool first_confident_speech_frame = false; + if (info.speech_probability < kVadConfidenceThreshold) { + frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold; + } else if (frames_to_gain_increase_allowed_ > 0) { + frames_to_gain_increase_allowed_--; + first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0; + } + apm_data_dumper_->DumpRaw( + "agc2_adaptive_gain_applier_frames_to_gain_increase_allowed", + frames_to_gain_increase_allowed_); + + const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0; + + float max_gain_increase_db = max_gain_change_db_per_10ms_; + if (first_confident_speech_frame) { + // No gain increase happened while waiting for a long enough speech + // sequence. Therefore, temporarily allow a faster gain increase. + RTC_DCHECK(gain_increase_allowed); + max_gain_increase_db *= config_.adjacent_speech_frames_threshold; + } + + const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( + target_gain_db, last_gain_db_, gain_increase_allowed, + /*max_gain_decrease_db=*/max_gain_change_db_per_10ms_, + max_gain_increase_db); + + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db", + target_gain_db - last_gain_db_); + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db", + gain_change_this_frame_db); + + // Optimization: avoid calling math functions if gain does not + // change. + if (gain_change_this_frame_db != 0.f) { + gain_applier_.SetGainFactor( + DbToRatio(last_gain_db_ + gain_change_this_frame_db)); + } + + // Modify `frame` only if not running in "dry run" mode. + if (!config_.dry_run) { + gain_applier_.ApplyGain(frame); + } else { + // Copy `frame` so that `ApplyGain()` is called (on a copy). + CopyAudio(frame, dry_run_frame_); + RTC_DCHECK(!dry_run_channels_.empty()); + AudioFrameView<float> frame_copy(&dry_run_channels_[0], + frame.num_channels(), + frame.samples_per_channel()); + gain_applier_.ApplyGain(frame_copy); + } + + // Remember that the gain has changed for the next iteration. + last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; + apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db", + last_gain_db_); + + // Log every 10 seconds. + calls_since_last_gain_log_++; + if (calls_since_last_gain_log_ == 1000) { + calls_since_last_gain_log_ = 0; + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel", + -info.speech_level_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", + -info.noise_rms_dbfs, 0, 100, 101); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin, + kHeadroomHistogramMax, + kHeadroomHistogramMax - kHeadroomHistogramMin + 1); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", + last_gain_db_, 0, kGainDbHistogramMax, + kGainDbHistogramMax + 1); + RTC_LOG(LS_INFO) << "AGC2 adaptive digital" + << " | speech_dbfs: " << info.speech_level_dbfs + << " | noise_dbfs: " << info.noise_rms_dbfs + << " | headroom_db: " << info.headroom_db + << " | gain_db: " << last_gain_db_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.h new file mode 100644 index 0000000000..dc84c1e238 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ + +#include <vector> + +#include "modules/audio_processing/agc2/gain_applier.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +class ApmDataDumper; + +// TODO(bugs.webrtc.org/7494): Split into `GainAdaptor` and `GainApplier`. +// Selects the target digital gain, decides when and how quickly to adapt to the +// target and applies the current gain to 10 ms frames. +class AdaptiveDigitalGainApplier { + public: + // Information about a frame to process. + struct FrameInfo { + float speech_probability; // Probability of speech in the [0, 1] range. + float speech_level_dbfs; // Estimated speech level (dBFS). + bool speech_level_reliable; // True with reliable speech level estimation. + float noise_rms_dbfs; // Estimated noise RMS level (dBFS). + float headroom_db; // Headroom (dB). + float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS). + }; + + AdaptiveDigitalGainApplier( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int sample_rate_hz, + int num_channels); + AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete; + AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) = + delete; + + void Initialize(int sample_rate_hz, int num_channels); + + // Analyzes `info`, updates the digital gain and applies it to a 10 ms + // `frame`. Supports any sample rate supported by APM. + void Process(const FrameInfo& info, AudioFrameView<float> frame); + + private: + ApmDataDumper* const apm_data_dumper_; + GainApplier gain_applier_; + + const AudioProcessing::Config::GainController2::AdaptiveDigital config_; + const float max_gain_change_db_per_10ms_; + + int calls_since_last_gain_log_; + int frames_to_gain_increase_allowed_; + float last_gain_db_; + + std::vector<std::vector<float>> dry_run_frame_; + std::vector<float*> dry_run_channels_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc new file mode 100644 index 0000000000..ea7485f512 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" + +#include <algorithm> +#include <memory> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kMono = 1; +constexpr int kStereo = 2; +constexpr int kFrameLen10ms8kHz = 80; +constexpr int kFrameLen10ms48kHz = 480; + +constexpr float kMaxSpeechProbability = 1.0f; + +// Constants used in place of estimated noise levels. +constexpr float kNoNoiseDbfs = kMinLevelDbfs; +constexpr float kWithNoiseDbfs = -20.0f; + +// Number of additional frames to process in the tests to ensure that the tested +// adaptation processes have converged. +constexpr int kNumExtraFrames = 10; + +constexpr float GetMaxGainChangePerFrameDb( + float max_gain_change_db_per_second) { + return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f; +} + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +constexpr AdaptiveDigitalConfig kDefaultConfig{}; + +// Helper to create initialized `AdaptiveDigitalGainApplier` objects. +struct GainApplierHelper { + GainApplierHelper(const AdaptiveDigitalConfig& config, + int sample_rate_hz, + int num_channels) + : apm_data_dumper(0), + gain_applier( + std::make_unique<AdaptiveDigitalGainApplier>(&apm_data_dumper, + config, + sample_rate_hz, + num_channels)) {} + ApmDataDumper apm_data_dumper; + std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier; +}; + +// Returns a `FrameInfo` sample to simulate noiseless speech detected with +// maximum probability and with level, headroom and limiter envelope chosen +// so that the resulting gain equals the default initial adaptive digital gain +// i.e., no gain adaptation is expected. +AdaptiveDigitalGainApplier::FrameInfo GetFrameInfoToNotAdapt( + const AdaptiveDigitalConfig& config) { + AdaptiveDigitalGainApplier::FrameInfo info; + info.speech_probability = kMaxSpeechProbability; + info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db; + info.speech_level_reliable = true; + info.noise_rms_dbfs = kNoNoiseDbfs; + info.headroom_db = config.headroom_db; + info.limiter_envelope_dbfs = -2.0f; + return info; +} + +TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { + GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo); + // Make one call with reasonable audio level values and settings. + VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); + helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig), + fake_audio.float_frame_view()); +} + +// Checks that the maximum allowed gain is applied. +TEST(GainController2AdaptiveGainApplier, MaxGainApplied) { + constexpr int kNumFramesToAdapt = + static_cast<int>(kDefaultConfig.max_gain_db / + GetMaxGainChangePerFrameDb( + kDefaultConfig.max_gain_change_db_per_second)) + + kNumExtraFrames; + + GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono); + AdaptiveDigitalGainApplier::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = -60.0f; + float applied_gain; + for (int i = 0; i < kNumFramesToAdapt; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + applied_gain = fake_audio.float_frame_view().channel(0)[0]; + } + const float applied_gain_db = 20.0f * std::log10f(applied_gain); + EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f); +} + +TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) { + GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono); + + constexpr float initial_level_dbfs = -25.0f; + constexpr float kMaxGainChangeDbPerFrame = + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int kNumFramesToAdapt = + static_cast<int>(initial_level_dbfs / kMaxGainChangeDbPerFrame) + + kNumExtraFrames; + + const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame); + + float last_gain_linear = 1.f; + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + AdaptiveDigitalGainApplier::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + max_change_per_frame_linear); + last_gain_linear = current_gain_linear; + } + + // Check that the same is true when gain decreases as well. + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f); + AdaptiveDigitalGainApplier::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = 0.f; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + max_change_per_frame_linear); + last_gain_linear = current_gain_linear; + } +} + +TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) { + GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono); + + constexpr float initial_level_dbfs = -25.0f; + + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainApplier::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + float maximal_difference = 0.0f; + float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db); + for (const auto& x : fake_audio.float_frame_view().channel(0)) { + const float difference = std::abs(x - current_value); + maximal_difference = std::max(maximal_difference, difference); + current_value = x; + } + + const float max_change_per_frame_linear = DbToRatio( + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second)); + const float max_change_per_sample = + max_change_per_frame_linear / kFrameLen10ms48kHz; + + EXPECT_LE(maximal_difference, max_change_per_sample); +} + +TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) { + GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono); + + constexpr float initial_level_dbfs = -25.0f; + constexpr int num_initial_frames = + kDefaultConfig.initial_gain_db / + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs) + << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainApplier::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + info.noise_rms_dbfs = kWithNoiseDbfs; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); + } + } +} + +TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) { + GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo); + + // Make one call with positive audio level values and settings. + VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); + AdaptiveDigitalGainApplier::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = 5.0f; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); +} + +TEST(GainController2GainApplier, AudioLevelLimitsGain) { + GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono); + + constexpr float initial_level_dbfs = -25.0f; + constexpr int num_initial_frames = + kDefaultConfig.initial_gain_db / + GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second); + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs) + << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f); + AdaptiveDigitalGainApplier::FrameInfo info = + GetFrameInfoToNotAdapt(kDefaultConfig); + info.speech_level_dbfs = initial_level_dbfs; + info.limiter_envelope_dbfs = 1.0f; + info.speech_level_reliable = false; + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f); + } + } +} + +class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(AdaptiveDigitalGainApplierTest, + DoNotIncreaseGainWithTooFewSpeechFrames) { + AdaptiveDigitalConfig config; + config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold(); + GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono); + + // Lower the speech level so that the target gain will be increased. + AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config); + info.speech_level_dbfs -= 12.0f; + + float prev_gain = 0.0f; + for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + const float gain = audio.float_frame_view().channel(0)[0]; + if (i > 0) { + EXPECT_EQ(prev_gain, gain); // No gain increase applied. + } + prev_gain = gain; + } +} + +TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) { + AdaptiveDigitalConfig config; + config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold(); + GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono); + + // Lower the speech level so that the target gain will be increased. + AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config); + info.speech_level_dbfs -= 12.0f; + + float prev_gain = 0.0f; + for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + prev_gain = audio.float_frame_view().channel(0)[0]; + } + + // Process one more speech frame. + VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); + helper.gain_applier->Process(info, audio.float_frame_view()); + + // An increased gain has been applied. + EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain); +} + +INSTANTIATE_TEST_SUITE_P(GainController2, + AdaptiveDigitalGainApplierTest, + ::testing::Values(1, 7, 31)); + +// Checks that the input is never modified when running in dry run mode. +TEST(GainController2GainApplier, DryRunDoesNotChangeInput) { + AdaptiveDigitalConfig config; + config.dry_run = true; + GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono); + + // Simulate an input signal with log speech level. + AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config); + info.speech_level_dbfs = -60.0f; + const int num_frames_to_adapt = + static_cast<int>( + config.max_gain_db / + GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) + + kNumExtraFrames; + constexpr float kPcmSamples = 123.456f; + // Run the gain applier and check that the PCM samples are not modified. + for (int i = 0; i < num_frames_to_adapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples); + helper.gain_applier->Process(info, fake_audio.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples); + } +} + +// Checks that no sample is modified before and after the sample rate changes. +TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) { + AdaptiveDigitalConfig config; + config.dry_run = true; + GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono); + + AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config); + info.speech_level_dbfs = -60.0f; + constexpr float kPcmSamples = 123.456f; + VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); + helper.gain_applier->Process(info, fake_audio_8k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); + VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples); + helper.gain_applier->Process(info, fake_audio_48k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); +} + +// Checks that no sample is modified before and after the number of channels +// changes. +TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) { + AdaptiveDigitalConfig config; + config.dry_run = true; + GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono); + + AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config); + info.speech_level_dbfs = -60.0f; + constexpr float kPcmSamples = 123.456f; + VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); + helper.gain_applier->Process(info, fake_audio_8k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); + VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples); + helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo); + helper.gain_applier->Process(info, fake_audio_48k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc new file mode 100644 index 0000000000..381e454868 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" + +#include <algorithm> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/vad_wrapper.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +// Peak and RMS audio levels in dBFS. +struct AudioLevels { + float peak_dbfs; + float rms_dbfs; +}; + +// Computes the audio levels for the first channel in `frame`. +AudioLevels ComputeAudioLevels(AudioFrameView<float> frame) { + float peak = 0.0f; + float rms = 0.0f; + for (const auto& x : frame.channel(0)) { + peak = std::max(std::fabs(x), peak); + rms += x * x; + } + return {FloatS16ToDbfs(peak), + FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))}; +} + +} // namespace + +AdaptiveDigitalGainController::AdaptiveDigitalGainController( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int sample_rate_hz, + int num_channels) + : speech_level_estimator_(apm_data_dumper, config), + gain_controller_(apm_data_dumper, config, sample_rate_hz, num_channels), + apm_data_dumper_(apm_data_dumper), + noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)), + saturation_protector_( + CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb, + config.adjacent_speech_frames_threshold, + apm_data_dumper)) { + RTC_DCHECK(apm_data_dumper); + RTC_DCHECK(noise_level_estimator_); + RTC_DCHECK(saturation_protector_); +} + +AdaptiveDigitalGainController::~AdaptiveDigitalGainController() = default; + +void AdaptiveDigitalGainController::Initialize(int sample_rate_hz, + int num_channels) { + gain_controller_.Initialize(sample_rate_hz, num_channels); +} + +void AdaptiveDigitalGainController::Process(AudioFrameView<float> frame, + float speech_probability, + float limiter_envelope) { + AudioLevels levels = ComputeAudioLevels(frame); + apm_data_dumper_->DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs); + apm_data_dumper_->DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs); + + AdaptiveDigitalGainApplier::FrameInfo info; + + info.speech_probability = speech_probability; + + speech_level_estimator_.Update(levels.rms_dbfs, levels.peak_dbfs, + info.speech_probability); + info.speech_level_dbfs = speech_level_estimator_.level_dbfs(); + info.speech_level_reliable = speech_level_estimator_.IsConfident(); + apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", info.speech_level_dbfs); + apm_data_dumper_->DumpRaw("agc2_speech_level_reliable", + info.speech_level_reliable); + + info.noise_rms_dbfs = noise_level_estimator_->Analyze(frame); + apm_data_dumper_->DumpRaw("agc2_noise_rms_dbfs", info.noise_rms_dbfs); + + saturation_protector_->Analyze(info.speech_probability, levels.peak_dbfs, + info.speech_level_dbfs); + info.headroom_db = saturation_protector_->HeadroomDb(); + apm_data_dumper_->DumpRaw("agc2_headroom_db", info.headroom_db); + + info.limiter_envelope_dbfs = FloatS16ToDbfs(limiter_envelope); + apm_data_dumper_->DumpRaw("agc2_limiter_envelope_dbfs", + info.limiter_envelope_dbfs); + + gain_controller_.Process(info, frame); +} + +void AdaptiveDigitalGainController::HandleInputGainChange() { + speech_level_estimator_.Reset(); + saturation_protector_->Reset(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h new file mode 100644 index 0000000000..75ea44591e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ + +#include <memory> + +#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" +#include "modules/audio_processing/agc2/noise_level_estimator.h" +#include "modules/audio_processing/agc2/saturation_protector.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +class ApmDataDumper; + +// Gain controller that adapts and applies a variable digital gain to meet the +// target level, which is determined by the given configuration. +class AdaptiveDigitalGainController { + public: + AdaptiveDigitalGainController( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int sample_rate_hz, + int num_channels); + AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete; + AdaptiveDigitalGainController& operator=( + const AdaptiveDigitalGainController&) = delete; + ~AdaptiveDigitalGainController(); + + // Detects and handles changes of sample rate and or number of channels. + void Initialize(int sample_rate_hz, int num_channels); + + // Analyzes `frame`, adapts the current digital gain and applies it to + // `frame`. + // TODO(bugs.webrtc.org/7494): Remove `limiter_envelope`. + void Process(AudioFrameView<float> frame, + float speech_probability, + float limiter_envelope); + + // Handles a gain change applied to the input signal (e.g., analog gain). + void HandleInputGainChange(); + + private: + AdaptiveModeLevelEstimator speech_level_estimator_; + AdaptiveDigitalGainApplier gain_controller_; + ApmDataDumper* const apm_data_dumper_; + std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_; + std::unique_ptr<SaturationProtector> saturation_protector_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gn/moz.build new file mode 100644 index 0000000000..26182baa24 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("adaptive_digital_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc new file mode 100644 index 0000000000..fe021fec05 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +float ClampLevelEstimateDbfs(float level_estimate_dbfs) { + return rtc::SafeClamp<float>(level_estimate_dbfs, -90.0f, 30.0f); +} + +// Returns the initial speech level estimate needed to apply the initial gain. +float GetInitialSpeechLevelEstimateDbfs( + const AudioProcessing::Config::GainController2::AdaptiveDigital& config) { + return ClampLevelEstimateDbfs(-kSaturationProtectorInitialHeadroomDb - + config.initial_gain_db - config.headroom_db); +} + +} // namespace + +bool AdaptiveModeLevelEstimator::LevelEstimatorState::operator==( + const AdaptiveModeLevelEstimator::LevelEstimatorState& b) const { + return time_to_confidence_ms == b.time_to_confidence_ms && + level_dbfs.numerator == b.level_dbfs.numerator && + level_dbfs.denominator == b.level_dbfs.denominator; +} + +float AdaptiveModeLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const { + RTC_DCHECK_NE(denominator, 0.f); + return numerator / denominator; +} + +AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config) + : apm_data_dumper_(apm_data_dumper), + initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)), + adjacent_speech_frames_threshold_( + config.adjacent_speech_frames_threshold), + level_dbfs_(initial_speech_level_dbfs_) { + RTC_DCHECK(apm_data_dumper_); + RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1); + Reset(); +} + +void AdaptiveModeLevelEstimator::Update(float rms_dbfs, + float peak_dbfs, + float speech_probability) { + RTC_DCHECK_GT(rms_dbfs, -150.0f); + RTC_DCHECK_LT(rms_dbfs, 50.0f); + RTC_DCHECK_GT(peak_dbfs, -150.0f); + RTC_DCHECK_LT(peak_dbfs, 50.0f); + RTC_DCHECK_GE(speech_probability, 0.0f); + RTC_DCHECK_LE(speech_probability, 1.0f); + if (speech_probability < kVadConfidenceThreshold) { + // Not a speech frame. + if (adjacent_speech_frames_threshold_ > 1) { + // When two or more adjacent speech frames are required in order to update + // the state, we need to decide whether to discard or confirm the updates + // based on the speech sequence length. + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // First non-speech frame after a long enough sequence of speech frames. + // Update the reliable state. + reliable_state_ = preliminary_state_; + } else if (num_adjacent_speech_frames_ > 0) { + // First non-speech frame after a too short sequence of speech frames. + // Reset to the last reliable state. + preliminary_state_ = reliable_state_; + } + } + num_adjacent_speech_frames_ = 0; + } else { + // Speech frame observed. + num_adjacent_speech_frames_++; + + // Update preliminary level estimate. + RTC_DCHECK_GE(preliminary_state_.time_to_confidence_ms, 0); + const bool buffer_is_full = preliminary_state_.time_to_confidence_ms == 0; + if (!buffer_is_full) { + preliminary_state_.time_to_confidence_ms -= kFrameDurationMs; + } + // Weighted average of levels with speech probability as weight. + RTC_DCHECK_GT(speech_probability, 0.0f); + const float leak_factor = buffer_is_full ? kLevelEstimatorLeakFactor : 1.0f; + preliminary_state_.level_dbfs.numerator = + preliminary_state_.level_dbfs.numerator * leak_factor + + rms_dbfs * speech_probability; + preliminary_state_.level_dbfs.denominator = + preliminary_state_.level_dbfs.denominator * leak_factor + + speech_probability; + + const float level_dbfs = preliminary_state_.level_dbfs.GetRatio(); + + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // `preliminary_state_` is now reliable. Update the last level estimation. + level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs); + } + } + DumpDebugData(); +} + +bool AdaptiveModeLevelEstimator::IsConfident() const { + if (adjacent_speech_frames_threshold_ == 1) { + // Ignore `reliable_state_` when a single frame is enough to update the + // level estimate (because it is not used). + return preliminary_state_.time_to_confidence_ms == 0; + } + // Once confident, it remains confident. + RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 || + preliminary_state_.time_to_confidence_ms == 0); + // During the first long enough speech sequence, `reliable_state_` must be + // ignored since `preliminary_state_` is used. + return reliable_state_.time_to_confidence_ms == 0 || + (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ && + preliminary_state_.time_to_confidence_ms == 0); +} + +void AdaptiveModeLevelEstimator::Reset() { + ResetLevelEstimatorState(preliminary_state_); + ResetLevelEstimatorState(reliable_state_); + level_dbfs_ = initial_speech_level_dbfs_; + num_adjacent_speech_frames_ = 0; +} + +void AdaptiveModeLevelEstimator::ResetLevelEstimatorState( + LevelEstimatorState& state) const { + state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs; + state.level_dbfs.numerator = initial_speech_level_dbfs_; + state.level_dbfs.denominator = 1.0f; +} + +void AdaptiveModeLevelEstimator::DumpDebugData() const { + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_num_adjacent_speech_frames", + num_adjacent_speech_frames_); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_level_estimate_num", + preliminary_state_.level_dbfs.numerator); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_level_estimate_den", + preliminary_state_.level_dbfs.denominator); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_preliminary_time_to_confidence_ms", + preliminary_state_.time_to_confidence_ms); + apm_data_dumper_->DumpRaw( + "agc2_adaptive_level_estimator_reliable_time_to_confidence_ms", + reliable_state_.time_to_confidence_ms); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator.h new file mode 100644 index 0000000000..989c8c3572 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ + +#include <stddef.h> + +#include <type_traits> + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/vad_wrapper.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +class ApmDataDumper; + +// Level estimator for the digital adaptive gain controller. +class AdaptiveModeLevelEstimator { + public: + AdaptiveModeLevelEstimator( + ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2::AdaptiveDigital& config); + AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete; + AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) = + delete; + + // Updates the level estimation. + void Update(float rms_dbfs, float peak_dbfs, float speech_probability); + // Returns the estimated speech plus noise level. + float level_dbfs() const { return level_dbfs_; } + // Returns true if the estimator is confident on its current estimate. + bool IsConfident() const; + + void Reset(); + + private: + // Part of the level estimator state used for check-pointing and restore ops. + struct LevelEstimatorState { + bool operator==(const LevelEstimatorState& s) const; + inline bool operator!=(const LevelEstimatorState& s) const { + return !(*this == s); + } + // TODO(bugs.webrtc.org/7494): Remove `time_to_confidence_ms` if redundant. + int time_to_confidence_ms; + struct Ratio { + float numerator; + float denominator; + float GetRatio() const; + } level_dbfs; + }; + static_assert(std::is_trivially_copyable<LevelEstimatorState>::value, ""); + + void ResetLevelEstimatorState(LevelEstimatorState& state) const; + + void DumpDebugData() const; + + ApmDataDumper* const apm_data_dumper_; + + const float initial_speech_level_dbfs_; + const int adjacent_speech_frames_threshold_; + LevelEstimatorState preliminary_state_; + LevelEstimatorState reliable_state_; + float level_dbfs_; + int num_adjacent_speech_frames_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc new file mode 100644 index 0000000000..684fca188a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" + +#include <memory> + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +using AdaptiveDigitalConfig = + AudioProcessing::Config::GainController2::AdaptiveDigital; + +// Number of speech frames that the level estimator must observe in order to +// become confident about the estimated level. +constexpr int kNumFramesToConfidence = + kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs; +static_assert(kNumFramesToConfidence > 0, ""); + +constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f; + +// Provides the `vad_level` value `num_iterations` times to `level_estimator`. +void RunOnConstantLevel(int num_iterations, + float rms_dbfs, + float peak_dbfs, + float speech_probability, + AdaptiveModeLevelEstimator& level_estimator) { + for (int i = 0; i < num_iterations; ++i) { + level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability); + } +} + +constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig( + int adjacent_speech_frames_threshold) { + AdaptiveDigitalConfig config; + config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold; + return config; +} + +constexpr float kNoSpeechProbability = 0.0f; +constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f; +constexpr float kMaxSpeechProbability = 1.0f; + +// Level estimator with data dumper. +struct TestLevelEstimator { + explicit TestLevelEstimator(int adjacent_speech_frames_threshold) + : data_dumper(0), + estimator(std::make_unique<AdaptiveModeLevelEstimator>( + &data_dumper, + GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))), + initial_speech_level_dbfs(estimator->level_dbfs()), + level_rms_dbfs(initial_speech_level_dbfs / 2.0f), + level_peak_dbfs(initial_speech_level_dbfs / 3.0f) { + RTC_DCHECK_LT(level_rms_dbfs, level_peak_dbfs); + RTC_DCHECK_LT(initial_speech_level_dbfs, level_rms_dbfs); + RTC_DCHECK_GT(level_rms_dbfs - initial_speech_level_dbfs, 5.0f) + << "Adjust `level_rms_dbfs` so that the difference from the initial " + "level is wide enough for the tests"; + } + ApmDataDumper data_dumper; + std::unique_ptr<AdaptiveModeLevelEstimator> estimator; + const float initial_speech_level_dbfs; + const float level_rms_dbfs; + const float level_peak_dbfs; +}; + +// Checks that the level estimator converges to a constant input speech level. +TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); + RunOnConstantLevel(/*num_iterations=*/1, level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs, + 0.1f); +} + +// Checks that the level controller does not become confident when too few +// speech frames are observed. +TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_FALSE(level_estimator.estimator->IsConfident()); +} + +// Checks that the level controller becomes confident when enough speech frames +// are observed. +TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_TRUE(level_estimator.estimator->IsConfident()); +} + +// Checks that the estimated level is not affected by the level of non-speech +// frames. +TEST(GainController2AdaptiveModeLevelEstimator, + EstimatorIgnoresNonSpeechFrames) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + // Simulate speech. + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + const float estimated_level_dbfs = level_estimator.estimator->level_dbfs(); + // Simulate full-scale non-speech. + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + /*rms_dbfs=*/0.0f, /*peak_dbfs=*/0.0f, + kNoSpeechProbability, *level_estimator.estimator); + // No estimated level change is expected. + EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), + estimated_level_dbfs); +} + +// Checks the convergence speed of the estimator before it becomes confident. +TEST(GainController2AdaptiveModeLevelEstimator, + ConvergenceSpeedBeforeConfidence) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, + level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, kMaxSpeechProbability, + *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), + level_estimator.level_rms_dbfs, + kConvergenceSpeedTestsLevelTolerance); +} + +// Checks the convergence speed of the estimator after it becomes confident. +TEST(GainController2AdaptiveModeLevelEstimator, + ConvergenceSpeedAfterConfidence) { + TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); + // Reach confidence using the initial level estimate. + RunOnConstantLevel( + /*num_iterations=*/kNumFramesToConfidence, + /*rms_dbfs=*/level_estimator.initial_speech_level_dbfs, + /*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f, + kMaxSpeechProbability, *level_estimator.estimator); + // No estimate change should occur, but confidence is achieved. + ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), + level_estimator.initial_speech_level_dbfs); + ASSERT_TRUE(level_estimator.estimator->IsConfident()); + // After confidence. + constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds. + static_assert( + kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, ""); + RunOnConstantLevel( + /*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames, + level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, + kMaxSpeechProbability, *level_estimator.estimator); + EXPECT_NEAR(level_estimator.estimator->level_dbfs(), + level_estimator.level_rms_dbfs, + kConvergenceSpeedTestsLevelTolerance); +} + +class AdaptiveModeLevelEstimatorParametrization + : public ::testing::TestWithParam<int> { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(AdaptiveModeLevelEstimatorParametrization, + DoNotAdaptToShortSpeechSegments) { + TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); + const float initial_level = level_estimator.estimator->level_dbfs(); + ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); + for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) { + SCOPED_TRACE(i); + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kMaxSpeechProbability); + EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); + } + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kLowSpeechProbability); + EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); +} + +TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) { + TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); + const float initial_level = level_estimator.estimator->level_dbfs(); + ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { + level_estimator.estimator->Update(level_estimator.level_rms_dbfs, + level_estimator.level_peak_dbfs, + kMaxSpeechProbability); + } + EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs()); +} + +INSTANTIATE_TEST_SUITE_P(GainController2, + AdaptiveModeLevelEstimatorParametrization, + ::testing::Values(1, 9, 17)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h new file mode 100644 index 0000000000..4af85527b8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_common.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ + +namespace webrtc { + +constexpr float kMinFloatS16Value = -32768.0f; +constexpr float kMaxFloatS16Value = 32767.0f; +constexpr float kMaxAbsFloatS16Value = 32768.0f; + +// Minimum audio level in dBFS scale for S16 samples. +constexpr float kMinLevelDbfs = -90.31f; + +constexpr int kFrameDurationMs = 10; +constexpr int kSubFramesInFrame = 20; +constexpr int kMaximalNumberOfSamplesPerChannel = 480; + +// Adaptive digital gain applier settings. + +// At what limiter levels should we start decreasing the adaptive digital gain. +constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f; + +// This is the threshold for speech. Speech frames are used for updating the +// speech level, measuring the amount of speech, and decide when to allow target +// gain changes. +constexpr float kVadConfidenceThreshold = 0.95f; + +// Number of milliseconds of speech frames to observe to make the estimator +// confident. +constexpr float kLevelEstimatorTimeToConfidenceMs = 400; +constexpr float kLevelEstimatorLeakFactor = + 1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs; + +// Saturation Protector settings. +constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f; +constexpr int kSaturationProtectorBufferSize = 4; + +// Number of interpolation points for each region of the limiter. +// These values have been tuned to limit the interpolated gain curve error given +// the limiter parameters and allowing a maximum error of +/- 32768^-1. +constexpr int kInterpolatedGainCurveKneePoints = 22; +constexpr int kInterpolatedGainCurveBeyondKneePoints = 10; +constexpr int kInterpolatedGainCurveTotalPoints = + kInterpolatedGainCurveKneePoints + kInterpolatedGainCurveBeyondKneePoints; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc new file mode 100644 index 0000000000..125e551b72 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +#include <cmath> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +std::vector<double> LinSpace(double l, double r, int num_points) { + RTC_CHECK_GE(num_points, 2); + std::vector<double> points(num_points); + const double step = (r - l) / (num_points - 1.0); + points[0] = l; + for (int i = 1; i < num_points - 1; i++) { + points[i] = static_cast<double>(l) + i * step; + } + points[num_points - 1] = r; + return points; +} + +WhiteNoiseGenerator::WhiteNoiseGenerator(int min_amplitude, int max_amplitude) + : rand_gen_(42), + min_amplitude_(min_amplitude), + max_amplitude_(max_amplitude) { + RTC_DCHECK_LT(min_amplitude_, max_amplitude_); + RTC_DCHECK_LE(kMinS16, min_amplitude_); + RTC_DCHECK_LE(min_amplitude_, kMaxS16); + RTC_DCHECK_LE(kMinS16, max_amplitude_); + RTC_DCHECK_LE(max_amplitude_, kMaxS16); +} + +float WhiteNoiseGenerator::operator()() { + return static_cast<float>(rand_gen_.Rand(min_amplitude_, max_amplitude_)); +} + +SineGenerator::SineGenerator(float amplitude, + float frequency_hz, + int sample_rate_hz) + : amplitude_(amplitude), + frequency_hz_(frequency_hz), + sample_rate_hz_(sample_rate_hz), + x_radians_(0.0f) { + RTC_DCHECK_GT(amplitude_, 0); + RTC_DCHECK_LE(amplitude_, kMaxS16); +} + +float SineGenerator::operator()() { + constexpr float kPi = 3.1415926536f; + x_radians_ += frequency_hz_ / sample_rate_hz_ * 2 * kPi; + if (x_radians_ >= 2 * kPi) { + x_radians_ -= 2 * kPi; + } + return amplitude_ * std::sinf(x_radians_); +} + +PulseGenerator::PulseGenerator(float pulse_amplitude, + float no_pulse_amplitude, + float frequency_hz, + int sample_rate_hz) + : pulse_amplitude_(pulse_amplitude), + no_pulse_amplitude_(no_pulse_amplitude), + samples_period_( + static_cast<int>(static_cast<float>(sample_rate_hz) / frequency_hz)), + sample_counter_(0) { + RTC_DCHECK_GE(pulse_amplitude_, kMinS16); + RTC_DCHECK_LE(pulse_amplitude_, kMaxS16); + RTC_DCHECK_GT(no_pulse_amplitude_, kMinS16); + RTC_DCHECK_LE(no_pulse_amplitude_, kMaxS16); + RTC_DCHECK_GT(sample_rate_hz, frequency_hz); +} + +float PulseGenerator::operator()() { + sample_counter_++; + if (sample_counter_ >= samples_period_) { + sample_counter_ -= samples_period_; + } + return static_cast<float>(sample_counter_ == 0 ? pulse_amplitude_ + : no_pulse_amplitude_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h new file mode 100644 index 0000000000..afed97e83b --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ + +#include <limits> +#include <vector> + +#include "rtc_base/random.h" + +namespace webrtc { +namespace test { + +constexpr float kMinS16 = + static_cast<float>(std::numeric_limits<int16_t>::min()); +constexpr float kMaxS16 = + static_cast<float>(std::numeric_limits<int16_t>::max()); + +// Level Estimator test parameters. +constexpr float kDecayMs = 20.0f; + +// Limiter parameters. +constexpr float kLimiterMaxInputLevelDbFs = 1.f; +constexpr float kLimiterKneeSmoothnessDb = 1.f; +constexpr float kLimiterCompressionRatio = 5.f; + +// Returns evenly spaced `num_points` numbers over a specified interval [l, r]. +std::vector<double> LinSpace(double l, double r, int num_points); + +// Generates white noise. +class WhiteNoiseGenerator { + public: + WhiteNoiseGenerator(int min_amplitude, int max_amplitude); + float operator()(); + + private: + Random rand_gen_; + const int min_amplitude_; + const int max_amplitude_; +}; + +// Generates a sine function. +class SineGenerator { + public: + SineGenerator(float amplitude, float frequency_hz, int sample_rate_hz); + float operator()(); + + private: + const float amplitude_; + const float frequency_hz_; + const int sample_rate_hz_; + float x_radians_; +}; + +// Generates periodic pulses. +class PulseGenerator { + public: + PulseGenerator(float pulse_amplitude, + float no_pulse_amplitude, + float frequency_hz, + int sample_rate_hz); + float operator()(); + + private: + const float pulse_amplitude_; + const float no_pulse_amplitude_; + const int samples_period_; + int sample_counter_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc new file mode 100644 index 0000000000..79c3cc95d9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/agc2_testing_common_unittest.cc @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(GainController2TestingCommon, LinSpace) { + std::vector<double> points1 = test::LinSpace(-1.0, 2.0, 4); + const std::vector<double> expected_points1{{-1.0, 0.0, 1.0, 2.0}}; + EXPECT_EQ(expected_points1, points1); + + std::vector<double> points2 = test::LinSpace(0.0, 1.0, 4); + const std::vector<double> expected_points2{{0.0, 1.0 / 3.0, 2.0 / 3.0, 1.0}}; + EXPECT_EQ(points2, expected_points2); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc new file mode 100644 index 0000000000..c1b80d7320 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/biquad_filter.h" + +#include "rtc_base/arraysize.h" + +namespace webrtc { + +BiQuadFilter::BiQuadFilter(const Config& config) + : config_(config), state_({}) {} + +BiQuadFilter::~BiQuadFilter() = default; + +void BiQuadFilter::SetConfig(const Config& config) { + config_ = config; + state_ = {}; +} + +void BiQuadFilter::Reset() { + state_ = {}; +} + +void BiQuadFilter::Process(rtc::ArrayView<const float> x, + rtc::ArrayView<float> y) { + RTC_DCHECK_EQ(x.size(), y.size()); + const float config_a0 = config_.a[0]; + const float config_a1 = config_.a[1]; + const float config_b0 = config_.b[0]; + const float config_b1 = config_.b[1]; + const float config_b2 = config_.b[2]; + float state_a0 = state_.a[0]; + float state_a1 = state_.a[1]; + float state_b0 = state_.b[0]; + float state_b1 = state_.b[1]; + for (size_t k = 0, x_size = x.size(); k < x_size; ++k) { + // Use a temporary variable for `x[k]` to allow in-place processing. + const float tmp = x[k]; + float y_k = config_b0 * tmp + config_b1 * state_b0 + config_b2 * state_b1 - + config_a0 * state_a0 - config_a1 * state_a1; + state_b1 = state_b0; + state_b0 = tmp; + state_a1 = state_a0; + state_a0 = y_k; + y[k] = y_k; + } + state_.a[0] = state_a0; + state_.a[1] = state_a1; + state_.b[0] = state_b0; + state_.b[1] = state_b1; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h new file mode 100644 index 0000000000..5273ff9386 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ + +#include "api/array_view.h" + +namespace webrtc { + +// Transposed direct form I implementation of a bi-quad filter. +// b[0] + b[1] • z^(-1) + b[2] • z^(-2) +// H(z) = ------------------------------------ +// 1 + a[1] • z^(-1) + a[2] • z^(-2) +class BiQuadFilter { + public: + // Normalized filter coefficients. + // Computed as `[b, a] = scipy.signal.butter(N=2, Wn, btype)`. + struct Config { + float b[3]; // b[0], b[1], b[2]. + float a[2]; // a[1], a[2]. + }; + + explicit BiQuadFilter(const Config& config); + BiQuadFilter(const BiQuadFilter&) = delete; + BiQuadFilter& operator=(const BiQuadFilter&) = delete; + ~BiQuadFilter(); + + // Sets the filter configuration and resets the internal state. + void SetConfig(const Config& config); + + // Zeroes the filter state. + void Reset(); + + // Filters `x` and writes the output in `y`, which must have the same length + // of `x`. In-place processing is supported. + void Process(rtc::ArrayView<const float> x, rtc::ArrayView<float> y); + + private: + Config config_; + struct State { + float b[2]; + float a[2]; + } state_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_BIQUAD_FILTER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build new file mode 100644 index 0000000000..ec66966c7e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("biquad_filter_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc new file mode 100644 index 0000000000..a53036b08e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/biquad_filter_unittest.cc @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/biquad_filter.h" + +#include <algorithm> +#include <array> +#include <cmath> + +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kFrameSize = 8; +constexpr int kNumFrames = 4; +using FloatArraySequence = + std::array<std::array<float, kFrameSize>, kNumFrames>; + +constexpr FloatArraySequence kBiQuadInputSeq = { + {{{-87.166290f, -8.029022f, 101.619583f, -0.294296f, -5.825764f, -8.890625f, + 10.310432f, 54.845333f}}, + {{-64.647644f, -6.883945f, 11.059189f, -95.242538f, -108.870834f, + 11.024944f, 63.044102f, -52.709583f}}, + {{-32.350529f, -18.108028f, -74.022339f, -8.986874f, -1.525581f, + 103.705513f, 6.346226f, -14.319557f}}, + {{22.645832f, -64.597153f, 55.462521f, -109.393188f, 10.117825f, + -40.019642f, -98.612228f, -8.330326f}}}}; + +// Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`. +constexpr BiQuadFilter::Config kBiQuadConfig{ + {0.99446179f, -1.98892358f, 0.99446179f}, + {-1.98889291f, 0.98895425f}}; + +// Comparing to scipy. The expected output is generated as follows: +// zi = np.float32([0, 0]) +// for i in range(4): +// yn, zi = scipy.signal.lfilter(B, A, x[i], zi=zi) +// print(yn) +constexpr FloatArraySequence kBiQuadOutputSeq = { + {{{-86.68354497f, -7.02175351f, 102.10290352f, -0.37487333f, -5.87205847f, + -8.85521608f, 10.33772563f, 54.51157181f}}, + {{-64.92531604f, -6.76395978f, 11.15534507f, -94.68073341f, -107.18177856f, + 13.24642474f, 64.84288941f, -50.97822629f}}, + {{-30.1579652f, -15.64850899f, -71.06662821f, -5.5883229f, 1.91175353f, + 106.5572003f, 8.57183046f, -12.06298473f}}, + {{24.84286614f, -62.18094158f, 57.91488056f, -106.65685933f, 13.38760103f, + -36.60367134f, -94.44880104f, -3.59920354f}}}}; + +// Fails for every pair from two equally sized rtc::ArrayView<float> views such +// that their relative error is above a given threshold. If the expected value +// of a pair is 0, `tolerance` is used to check the absolute error. +void ExpectNearRelative(rtc::ArrayView<const float> expected, + rtc::ArrayView<const float> computed, + const float tolerance) { + // The relative error is undefined when the expected value is 0. + // When that happens, check the absolute error instead. `safe_den` is used + // below to implement such logic. + auto safe_den = [](float x) { return (x == 0.0f) ? 1.0f : std::fabs(x); }; + ASSERT_EQ(expected.size(), computed.size()); + for (size_t i = 0; i < expected.size(); ++i) { + const float abs_diff = std::fabs(expected[i] - computed[i]); + // No failure when the values are equal. + if (abs_diff == 0.0f) { + continue; + } + SCOPED_TRACE(i); + SCOPED_TRACE(expected[i]); + SCOPED_TRACE(computed[i]); + EXPECT_LE(abs_diff / safe_den(expected[i]), tolerance); + } +} + +// Checks that filtering works when different containers are used both as input +// and as output. +TEST(BiQuadFilterTest, FilterNotInPlace) { + BiQuadFilter filter(kBiQuadConfig); + std::array<float, kFrameSize> samples; + + // TODO(https://bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + for (int i = 0; i < kNumFrames; ++i) { + SCOPED_TRACE(i); + filter.Process(kBiQuadInputSeq[i], samples); + ExpectNearRelative(kBiQuadOutputSeq[i], samples, 2e-4f); + } +} + +// Checks that filtering works when the same container is used both as input and +// as output. +TEST(BiQuadFilterTest, FilterInPlace) { + BiQuadFilter filter(kBiQuadConfig); + std::array<float, kFrameSize> samples; + + // TODO(https://bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + for (int i = 0; i < kNumFrames; ++i) { + SCOPED_TRACE(i); + std::copy(kBiQuadInputSeq[i].begin(), kBiQuadInputSeq[i].end(), + samples.begin()); + filter.Process({samples}, {samples}); + ExpectNearRelative(kBiQuadOutputSeq[i], samples, 2e-4f); + } +} + +// Checks that different configurations produce different outputs. +TEST(BiQuadFilterTest, SetConfigDifferentOutput) { + BiQuadFilter filter(/*config=*/{{0.97803048f, -1.95606096f, 0.97803048f}, + {-1.95557824f, 0.95654368f}}); + + std::array<float, kFrameSize> samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.SetConfig( + {{0.09763107f, 0.19526215f, 0.09763107f}, {-0.94280904f, 0.33333333f}}); + std::array<float, kFrameSize> samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_NE(samples1, samples2); +} + +// Checks that when `SetConfig()` is called but the filter coefficients are the +// same the filter state is reset. +TEST(BiQuadFilterTest, SetConfigResetsState) { + BiQuadFilter filter(kBiQuadConfig); + + std::array<float, kFrameSize> samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.SetConfig(kBiQuadConfig); + std::array<float, kFrameSize> samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_EQ(samples1, samples2); +} + +// Checks that when `Reset()` is called the filter state is reset. +TEST(BiQuadFilterTest, Reset) { + BiQuadFilter filter(kBiQuadConfig); + + std::array<float, kFrameSize> samples1; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples1); + } + + filter.Reset(); + std::array<float, kFrameSize> samples2; + for (int i = 0; i < kNumFrames; ++i) { + filter.Process(kBiQuadInputSeq[i], samples2); + } + + EXPECT_EQ(samples1, samples2); +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build new file mode 100644 index 0000000000..9d239a6c3d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/common_gn/moz.build @@ -0,0 +1,189 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc new file mode 100644 index 0000000000..221b499e32 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.cc @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/compute_interpolated_gain_curve.h" + +#include <algorithm> +#include <cmath> +#include <queue> +#include <tuple> +#include <utility> +#include <vector> + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +std::pair<double, double> ComputeLinearApproximationParams( + const LimiterDbGainCurve* limiter, + const double x) { + const double m = limiter->GetGainFirstDerivativeLinear(x); + const double q = limiter->GetGainLinear(x) - m * x; + return {m, q}; +} + +double ComputeAreaUnderPiecewiseLinearApproximation( + const LimiterDbGainCurve* limiter, + const double x0, + const double x1) { + RTC_CHECK_LT(x0, x1); + + // Linear approximation in x0 and x1. + double m0, q0, m1, q1; + std::tie(m0, q0) = ComputeLinearApproximationParams(limiter, x0); + std::tie(m1, q1) = ComputeLinearApproximationParams(limiter, x1); + + // Intersection point between two adjacent linear pieces. + RTC_CHECK_NE(m1, m0); + const double x_split = (q0 - q1) / (m1 - m0); + RTC_CHECK_LT(x0, x_split); + RTC_CHECK_LT(x_split, x1); + + auto area_under_linear_piece = [](double x_l, double x_r, double m, + double q) { + return x_r * (m * x_r / 2.0 + q) - x_l * (m * x_l / 2.0 + q); + }; + return area_under_linear_piece(x0, x_split, m0, q0) + + area_under_linear_piece(x_split, x1, m1, q1); +} + +// Computes the approximation error in the limiter region for a given interval. +// The error is computed as the difference between the areas beneath the limiter +// curve to approximate and its linear under-approximation. +double LimiterUnderApproximationNegativeError(const LimiterDbGainCurve* limiter, + const double x0, + const double x1) { + const double area_limiter = limiter->GetGainIntegralLinear(x0, x1); + const double area_interpolated_curve = + ComputeAreaUnderPiecewiseLinearApproximation(limiter, x0, x1); + RTC_CHECK_GE(area_limiter, area_interpolated_curve); + return area_limiter - area_interpolated_curve; +} + +// Automatically finds where to sample the beyond-knee region of a limiter using +// a greedy optimization algorithm that iteratively decreases the approximation +// error. +// The solution is sub-optimal because the algorithm is greedy and the points +// are assigned by halving intervals (starting with the whole beyond-knee region +// as a single interval). However, even if sub-optimal, this algorithm works +// well in practice and it is efficiently implemented using priority queues. +std::vector<double> SampleLimiterRegion(const LimiterDbGainCurve* limiter) { + static_assert(kInterpolatedGainCurveBeyondKneePoints > 2, ""); + + struct Interval { + Interval() = default; // Ctor required by std::priority_queue. + Interval(double l, double r, double e) : x0(l), x1(r), error(e) { + RTC_CHECK(x0 < x1); + } + bool operator<(const Interval& other) const { return error < other.error; } + + double x0; + double x1; + double error; + }; + + std::priority_queue<Interval, std::vector<Interval>> q; + q.emplace(limiter->limiter_start_linear(), limiter->max_input_level_linear(), + LimiterUnderApproximationNegativeError( + limiter, limiter->limiter_start_linear(), + limiter->max_input_level_linear())); + + // Iteratively find points by halving the interval with greatest error. + while (q.size() < kInterpolatedGainCurveBeyondKneePoints) { + // Get the interval with highest error. + const auto interval = q.top(); + q.pop(); + + // Split `interval` and enqueue. + double x_split = (interval.x0 + interval.x1) / 2.0; + q.emplace(interval.x0, x_split, + LimiterUnderApproximationNegativeError(limiter, interval.x0, + x_split)); // Left. + q.emplace(x_split, interval.x1, + LimiterUnderApproximationNegativeError(limiter, x_split, + interval.x1)); // Right. + } + + // Copy x1 values and sort them. + RTC_CHECK_EQ(q.size(), kInterpolatedGainCurveBeyondKneePoints); + std::vector<double> samples(kInterpolatedGainCurveBeyondKneePoints); + for (size_t i = 0; i < kInterpolatedGainCurveBeyondKneePoints; ++i) { + const auto interval = q.top(); + q.pop(); + samples[i] = interval.x1; + } + RTC_CHECK(q.empty()); + std::sort(samples.begin(), samples.end()); + + return samples; +} + +// Compute the parameters to over-approximate the knee region via linear +// interpolation. Over-approximating is saturation-safe since the knee region is +// convex. +void PrecomputeKneeApproxParams(const LimiterDbGainCurve* limiter, + test::InterpolatedParameters* parameters) { + static_assert(kInterpolatedGainCurveKneePoints > 2, ""); + // Get `kInterpolatedGainCurveKneePoints` - 1 equally spaced points. + const std::vector<double> points = test::LinSpace( + limiter->knee_start_linear(), limiter->limiter_start_linear(), + kInterpolatedGainCurveKneePoints - 1); + + // Set the first two points. The second is computed to help with the beginning + // of the knee region, which has high curvature. + parameters->computed_approximation_params_x[0] = points[0]; + parameters->computed_approximation_params_x[1] = + (points[0] + points[1]) / 2.0; + // Copy the remaining points. + std::copy(std::begin(points) + 1, std::end(points), + std::begin(parameters->computed_approximation_params_x) + 2); + + // Compute (m, q) pairs for each linear piece y = mx + q. + for (size_t i = 0; i < kInterpolatedGainCurveKneePoints - 1; ++i) { + const double x0 = parameters->computed_approximation_params_x[i]; + const double x1 = parameters->computed_approximation_params_x[i + 1]; + const double y0 = limiter->GetGainLinear(x0); + const double y1 = limiter->GetGainLinear(x1); + RTC_CHECK_NE(x1, x0); + parameters->computed_approximation_params_m[i] = (y1 - y0) / (x1 - x0); + parameters->computed_approximation_params_q[i] = + y0 - parameters->computed_approximation_params_m[i] * x0; + } +} + +// Compute the parameters to under-approximate the beyond-knee region via linear +// interpolation and greedy sampling. Under-approximating is saturation-safe +// since the beyond-knee region is concave. +void PrecomputeBeyondKneeApproxParams( + const LimiterDbGainCurve* limiter, + test::InterpolatedParameters* parameters) { + // Find points on which the linear pieces are tangent to the gain curve. + const auto samples = SampleLimiterRegion(limiter); + + // Parametrize each linear piece. + double m, q; + std::tie(m, q) = ComputeLinearApproximationParams( + limiter, + parameters + ->computed_approximation_params_x[kInterpolatedGainCurveKneePoints - + 1]); + parameters + ->computed_approximation_params_m[kInterpolatedGainCurveKneePoints - 1] = + m; + parameters + ->computed_approximation_params_q[kInterpolatedGainCurveKneePoints - 1] = + q; + for (size_t i = 0; i < samples.size(); ++i) { + std::tie(m, q) = ComputeLinearApproximationParams(limiter, samples[i]); + parameters + ->computed_approximation_params_m[i + + kInterpolatedGainCurveKneePoints] = m; + parameters + ->computed_approximation_params_q[i + + kInterpolatedGainCurveKneePoints] = q; + } + + // Find the point of intersection between adjacent linear pieces. They will be + // used as boundaries between adjacent linear pieces. + for (size_t i = kInterpolatedGainCurveKneePoints; + i < kInterpolatedGainCurveKneePoints + + kInterpolatedGainCurveBeyondKneePoints; + ++i) { + RTC_CHECK_NE(parameters->computed_approximation_params_m[i], + parameters->computed_approximation_params_m[i - 1]); + parameters->computed_approximation_params_x[i] = + ( // Formula: (q0 - q1) / (m1 - m0). + parameters->computed_approximation_params_q[i - 1] - + parameters->computed_approximation_params_q[i]) / + (parameters->computed_approximation_params_m[i] - + parameters->computed_approximation_params_m[i - 1]); + } +} + +} // namespace + +namespace test { + +InterpolatedParameters ComputeInterpolatedGainCurveApproximationParams() { + InterpolatedParameters parameters; + LimiterDbGainCurve limiter; + parameters.computed_approximation_params_x.fill(0.0f); + parameters.computed_approximation_params_m.fill(0.0f); + parameters.computed_approximation_params_q.fill(0.0f); + PrecomputeKneeApproxParams(&limiter, ¶meters); + PrecomputeBeyondKneeApproxParams(&limiter, ¶meters); + return parameters; +} +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h new file mode 100644 index 0000000000..08b676f5fd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/compute_interpolated_gain_curve.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ + +#include <array> + +#include "modules/audio_processing/agc2/agc2_common.h" + +namespace webrtc { + +namespace test { + +// Parameters for interpolated gain curve using under-approximation to +// avoid saturation. +// +// The saturation gain is defined in order to let hard-clipping occur for +// those samples having a level that falls in the saturation region. It is an +// upper bound of the actual gain to apply - i.e., that returned by the +// limiter. + +// Knee and beyond-knee regions approximation parameters. +// The gain curve is approximated as a piece-wise linear function. +// `approx_params_x_` are the boundaries between adjacent linear pieces, +// `approx_params_m_` and `approx_params_q_` are the slope and the y-intercept +// values of each piece. +struct InterpolatedParameters { + std::array<float, kInterpolatedGainCurveTotalPoints> + computed_approximation_params_x; + std::array<float, kInterpolatedGainCurveTotalPoints> + computed_approximation_params_m; + std::array<float, kInterpolatedGainCurveTotalPoints> + computed_approximation_params_q; +}; + +InterpolatedParameters ComputeInterpolatedGainCurveApproximationParams(); +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_COMPUTE_INTERPOLATED_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc new file mode 100644 index 0000000000..cced7614bc --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/cpu_features.h" + +#include "rtc_base/strings/string_builder.h" +#include "rtc_base/system/arch.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +std::string AvailableCpuFeatures::ToString() const { + char buf[64]; + rtc::SimpleStringBuilder builder(buf); + bool first = true; + if (sse2) { + builder << (first ? "SSE2" : "_SSE2"); + first = false; + } + if (avx2) { + builder << (first ? "AVX2" : "_AVX2"); + first = false; + } + if (neon) { + builder << (first ? "NEON" : "_NEON"); + first = false; + } + if (first) { + return "none"; + } + return builder.str(); +} + +// Detects available CPU features. +AvailableCpuFeatures GetAvailableCpuFeatures() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + return {/*sse2=*/GetCPUInfo(kSSE2) != 0, + /*avx2=*/GetCPUInfo(kAVX2) != 0, + /*neon=*/false}; +#elif defined(WEBRTC_HAS_NEON) + return {/*sse2=*/false, + /*avx2=*/false, + /*neon=*/true}; +#else + return {/*sse2=*/false, + /*avx2=*/false, + /*neon=*/false}; +#endif +} + +AvailableCpuFeatures NoAvailableCpuFeatures() { + return {/*sse2=*/false, /*avx2=*/false, /*neon=*/false}; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h new file mode 100644 index 0000000000..54ddfb3055 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ + +#include <string> + +namespace webrtc { + +// Collection of flags indicating which CPU features are available on the +// current platform. True means available. +struct AvailableCpuFeatures { + AvailableCpuFeatures(bool sse2, bool avx2, bool neon) + : sse2(sse2), avx2(avx2), neon(neon) {} + // Intel. + bool sse2; + bool avx2; + // ARM. + bool neon; + std::string ToString() const; +}; + +// Detects what CPU features are available. +AvailableCpuFeatures GetAvailableCpuFeatures(); + +// Returns the CPU feature flags all set to false. +AvailableCpuFeatures NoAvailableCpuFeatures(); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_CPU_FEATURES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build new file mode 100644 index 0000000000..53d9444734 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features_gn/moz.build @@ -0,0 +1,212 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/cpu_features.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("cpu_features_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build new file mode 100644 index 0000000000..3e7668851c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_gn/moz.build @@ -0,0 +1,215 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("fixed_digital_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc new file mode 100644 index 0000000000..1995b24913 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" + +#include <algorithm> +#include <cmath> + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kInitialFilterStateLevel = 0.0f; + +// Instant attack. +constexpr float kAttackFilterConstant = 0.0f; + +// Limiter decay constant. +// Computed as `10 ** (-1/20 * subframe_duration / kDecayMs)` where: +// - `subframe_duration` is `kFrameDurationMs / kSubFramesInFrame`; +// - `kDecayMs` is defined in agc2_testing_common.h. +constexpr float kDecayFilterConstant = 0.9971259f; + +} // namespace + +FixedDigitalLevelEstimator::FixedDigitalLevelEstimator( + int sample_rate_hz, + ApmDataDumper* apm_data_dumper) + : apm_data_dumper_(apm_data_dumper), + filter_state_level_(kInitialFilterStateLevel) { + SetSampleRate(sample_rate_hz); + CheckParameterCombination(); + RTC_DCHECK(apm_data_dumper_); + apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate", sample_rate_hz); +} + +void FixedDigitalLevelEstimator::CheckParameterCombination() { + RTC_DCHECK_GT(samples_in_frame_, 0); + RTC_DCHECK_LE(kSubFramesInFrame, samples_in_frame_); + RTC_DCHECK_EQ(samples_in_frame_ % kSubFramesInFrame, 0); + RTC_DCHECK_GT(samples_in_sub_frame_, 1); +} + +std::array<float, kSubFramesInFrame> FixedDigitalLevelEstimator::ComputeLevel( + const AudioFrameView<const float>& float_frame) { + RTC_DCHECK_GT(float_frame.num_channels(), 0); + RTC_DCHECK_EQ(float_frame.samples_per_channel(), samples_in_frame_); + + // Compute max envelope without smoothing. + std::array<float, kSubFramesInFrame> envelope{}; + for (int channel_idx = 0; channel_idx < float_frame.num_channels(); + ++channel_idx) { + const auto channel = float_frame.channel(channel_idx); + for (int sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) { + for (int sample_in_sub_frame = 0; + sample_in_sub_frame < samples_in_sub_frame_; ++sample_in_sub_frame) { + envelope[sub_frame] = + std::max(envelope[sub_frame], + std::abs(channel[sub_frame * samples_in_sub_frame_ + + sample_in_sub_frame])); + } + } + } + + // Make sure envelope increases happen one step earlier so that the + // corresponding *gain decrease* doesn't miss a sudden signal + // increase due to interpolation. + for (int sub_frame = 0; sub_frame < kSubFramesInFrame - 1; ++sub_frame) { + if (envelope[sub_frame] < envelope[sub_frame + 1]) { + envelope[sub_frame] = envelope[sub_frame + 1]; + } + } + + // Add attack / decay smoothing. + for (int sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) { + const float envelope_value = envelope[sub_frame]; + if (envelope_value > filter_state_level_) { + envelope[sub_frame] = envelope_value * (1 - kAttackFilterConstant) + + filter_state_level_ * kAttackFilterConstant; + } else { + envelope[sub_frame] = envelope_value * (1 - kDecayFilterConstant) + + filter_state_level_ * kDecayFilterConstant; + } + filter_state_level_ = envelope[sub_frame]; + + // Dump data for debug. + RTC_DCHECK(apm_data_dumper_); + const auto channel = float_frame.channel(0); + apm_data_dumper_->DumpRaw("agc2_level_estimator_samples", + samples_in_sub_frame_, + &channel[sub_frame * samples_in_sub_frame_]); + apm_data_dumper_->DumpRaw("agc2_level_estimator_level", + envelope[sub_frame]); + } + + return envelope; +} + +void FixedDigitalLevelEstimator::SetSampleRate(int sample_rate_hz) { + samples_in_frame_ = + rtc::CheckedDivExact(sample_rate_hz * kFrameDurationMs, 1000); + samples_in_sub_frame_ = + rtc::CheckedDivExact(samples_in_frame_, kSubFramesInFrame); + CheckParameterCombination(); +} + +void FixedDigitalLevelEstimator::Reset() { + filter_state_level_ = kInitialFilterStateLevel; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h new file mode 100644 index 0000000000..d26b55950c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ + +#include <array> +#include <vector> + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +class ApmDataDumper; +// Produces a smooth signal level estimate from an input audio +// stream. The estimate smoothing is done through exponential +// filtering. +class FixedDigitalLevelEstimator { + public: + // Sample rates are allowed if the number of samples in a frame + // (sample_rate_hz * kFrameDurationMs / 1000) is divisible by + // kSubFramesInSample. For kFrameDurationMs=10 and + // kSubFramesInSample=20, this means that sample_rate_hz has to be + // divisible by 2000. + FixedDigitalLevelEstimator(int sample_rate_hz, + ApmDataDumper* apm_data_dumper); + + FixedDigitalLevelEstimator(const FixedDigitalLevelEstimator&) = delete; + FixedDigitalLevelEstimator& operator=(const FixedDigitalLevelEstimator&) = + delete; + + // The input is assumed to be in FloatS16 format. Scaled input will + // produce similarly scaled output. A frame of with kFrameDurationMs + // ms of audio produces a level estimates in the same scale. The + // level estimate contains kSubFramesInFrame values. + std::array<float, kSubFramesInFrame> ComputeLevel( + const AudioFrameView<const float>& float_frame); + + // Rate may be changed at any time (but not concurrently) from the + // value passed to the constructor. The class is not thread safe. + void SetSampleRate(int sample_rate_hz); + + // Resets the level estimator internal state. + void Reset(); + + float LastAudioLevel() const { return filter_state_level_; } + + private: + void CheckParameterCombination(); + + ApmDataDumper* const apm_data_dumper_ = nullptr; + float filter_state_level_; + int samples_in_frame_; + int samples_in_sub_frame_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc new file mode 100644 index 0000000000..97b421d04c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" + +#include <limits> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr float kInputLevel = 10000.f; + +// Run audio at specified settings through the level estimator, and +// verify that the output level falls within the bounds. +void TestLevelEstimator(int sample_rate_hz, + int num_channels, + float input_level_linear_scale, + float expected_min, + float expected_max) { + ApmDataDumper apm_data_dumper(0); + FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper); + + const VectorFloatFrame vectors_with_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), + input_level_linear_scale); + + for (int i = 0; i < 500; ++i) { + const auto level = level_estimator.ComputeLevel( + vectors_with_float_frame.float_frame_view()); + + // Give the estimator some time to ramp up. + if (i < 50) { + continue; + } + + for (const auto& x : level) { + EXPECT_LE(expected_min, x); + EXPECT_LE(x, expected_max); + } + } +} + +// Returns time it takes for the level estimator to decrease its level +// estimate by 'level_reduction_db'. +float TimeMsToDecreaseLevel(int sample_rate_hz, + int num_channels, + float input_level_db, + float level_reduction_db) { + const float input_level = DbfsToFloatS16(input_level_db); + RTC_DCHECK_GT(level_reduction_db, 0); + + const VectorFloatFrame vectors_with_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), input_level); + + ApmDataDumper apm_data_dumper(0); + FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper); + + // Give the LevelEstimator plenty of time to ramp up and stabilize + float last_level = 0.f; + for (int i = 0; i < 500; ++i) { + const auto level_envelope = level_estimator.ComputeLevel( + vectors_with_float_frame.float_frame_view()); + last_level = *level_envelope.rbegin(); + } + + // Set input to 0. + VectorFloatFrame vectors_with_zero_float_frame( + num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), 0); + + const float reduced_level_linear = + DbfsToFloatS16(input_level_db - level_reduction_db); + int sub_frames_until_level_reduction = 0; + while (last_level > reduced_level_linear) { + const auto level_envelope = level_estimator.ComputeLevel( + vectors_with_zero_float_frame.float_frame_view()); + for (const auto& v : level_envelope) { + EXPECT_LT(v, last_level); + sub_frames_until_level_reduction++; + last_level = v; + if (last_level <= reduced_level_linear) { + break; + } + } + } + return static_cast<float>(sub_frames_until_level_reduction) * + kFrameDurationMs / kSubFramesInFrame; +} +} // namespace + +TEST(GainController2FixedDigitalLevelEstimator, EstimatorShouldNotCrash) { + TestLevelEstimator(8000, 1, 0, std::numeric_limits<float>::lowest(), + std::numeric_limits<float>::max()); +} + +TEST(GainController2FixedDigitalLevelEstimator, + EstimatorShouldEstimateConstantLevel) { + TestLevelEstimator(10000, 1, kInputLevel, kInputLevel * 0.99, + kInputLevel * 1.01); +} + +TEST(GainController2FixedDigitalLevelEstimator, + EstimatorShouldEstimateConstantLevelForManyChannels) { + constexpr size_t num_channels = 10; + TestLevelEstimator(20000, num_channels, kInputLevel, kInputLevel * 0.99, + kInputLevel * 1.01); +} + +TEST(GainController2FixedDigitalLevelEstimator, TimeToDecreaseForLowLevel) { + constexpr float kLevelReductionDb = 25; + constexpr float kInitialLowLevel = -40; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + + const float time_to_decrease = + TimeMsToDecreaseLevel(22000, 1, kInitialLowLevel, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +TEST(GainController2FixedDigitalLevelEstimator, + TimeToDecreaseForFullScaleLevel) { + constexpr float kLevelReductionDb = 25; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + + const float time_to_decrease = + TimeMsToDecreaseLevel(26000, 1, 0, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +TEST(GainController2FixedDigitalLevelEstimator, + TimeToDecreaseForMultipleChannels) { + constexpr float kLevelReductionDb = 25; + constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs; + constexpr size_t kNumChannels = 10; + + const float time_to_decrease = + TimeMsToDecreaseLevel(28000, kNumChannels, 0, kLevelReductionDb); + + EXPECT_LE(kExpectedTime * 0.9, time_to_decrease); + EXPECT_LE(time_to_decrease, kExpectedTime * 1.1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc new file mode 100644 index 0000000000..f9e276d3a8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/gain_applier.h" + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// Returns true when the gain factor is so close to 1 that it would +// not affect int16 samples. +bool GainCloseToOne(float gain_factor) { + return 1.f - 1.f / kMaxFloatS16Value <= gain_factor && + gain_factor <= 1.f + 1.f / kMaxFloatS16Value; +} + +void ClipSignal(AudioFrameView<float> signal) { + for (int k = 0; k < signal.num_channels(); ++k) { + rtc::ArrayView<float> channel_view = signal.channel(k); + for (auto& sample : channel_view) { + sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +void ApplyGainWithRamping(float last_gain_linear, + float gain_at_end_of_frame_linear, + float inverse_samples_per_channel, + AudioFrameView<float> float_frame) { + // Do not modify the signal. + if (last_gain_linear == gain_at_end_of_frame_linear && + GainCloseToOne(gain_at_end_of_frame_linear)) { + return; + } + + // Gain is constant and different from 1. + if (last_gain_linear == gain_at_end_of_frame_linear) { + for (int k = 0; k < float_frame.num_channels(); ++k) { + rtc::ArrayView<float> channel_view = float_frame.channel(k); + for (auto& sample : channel_view) { + sample *= gain_at_end_of_frame_linear; + } + } + return; + } + + // The gain changes. We have to change slowly to avoid discontinuities. + const float increment = (gain_at_end_of_frame_linear - last_gain_linear) * + inverse_samples_per_channel; + float gain = last_gain_linear; + for (int i = 0; i < float_frame.samples_per_channel(); ++i) { + for (int ch = 0; ch < float_frame.num_channels(); ++ch) { + float_frame.channel(ch)[i] *= gain; + } + gain += increment; + } +} + +} // namespace + +GainApplier::GainApplier(bool hard_clip_samples, float initial_gain_factor) + : hard_clip_samples_(hard_clip_samples), + last_gain_factor_(initial_gain_factor), + current_gain_factor_(initial_gain_factor) {} + +void GainApplier::ApplyGain(AudioFrameView<float> signal) { + if (static_cast<int>(signal.samples_per_channel()) != samples_per_channel_) { + Initialize(signal.samples_per_channel()); + } + + ApplyGainWithRamping(last_gain_factor_, current_gain_factor_, + inverse_samples_per_channel_, signal); + + last_gain_factor_ = current_gain_factor_; + + if (hard_clip_samples_) { + ClipSignal(signal); + } +} + +// TODO(bugs.webrtc.org/7494): Remove once switched to gains in dB. +void GainApplier::SetGainFactor(float gain_factor) { + RTC_DCHECK_GT(gain_factor, 0.f); + current_gain_factor_ = gain_factor; +} + +void GainApplier::Initialize(int samples_per_channel) { + RTC_DCHECK_GT(samples_per_channel, 0); + samples_per_channel_ = static_cast<int>(samples_per_channel); + inverse_samples_per_channel_ = 1.f / samples_per_channel_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h new file mode 100644 index 0000000000..ba8a4a4cd2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ + +#include <stddef.h> + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class GainApplier { + public: + GainApplier(bool hard_clip_samples, float initial_gain_factor); + + void ApplyGain(AudioFrameView<float> signal); + void SetGainFactor(float gain_factor); + float GetGainFactor() const { return current_gain_factor_; } + + private: + void Initialize(int samples_per_channel); + + // Whether to clip samples after gain is applied. If 'true', result + // will fit in FloatS16 range. + const bool hard_clip_samples_; + float last_gain_factor_; + + // If this value is not equal to 'last_gain_factor', gain will be + // ramped from 'last_gain_factor_' to this value during the next + // 'ApplyGain'. + float current_gain_factor_; + int samples_per_channel_ = -1; + float inverse_samples_per_channel_ = -1.f; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_GAIN_APPLIER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build new file mode 100644 index 0000000000..1e7fc30e59 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("gain_applier_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc new file mode 100644 index 0000000000..3296345e62 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/gain_applier_unittest.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/gain_applier.h" + +#include <math.h> + +#include <algorithm> +#include <limits> + +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +TEST(AutomaticGainController2GainApplier, InitialGainIsRespected) { + constexpr float initial_signal_level = 123.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(true, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + initial_signal_level * gain_factor, 0.1f); +} + +TEST(AutomaticGainController2GainApplier, ClippingIsDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(true, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + std::numeric_limits<int16_t>::max(), 0.1f); +} + +TEST(AutomaticGainController2GainApplier, ClippingIsNotDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float gain_factor = 10.f; + VectorFloatFrame fake_audio(1, 1, initial_signal_level); + GainApplier gain_applier(false, gain_factor); + + gain_applier.ApplyGain(fake_audio.float_frame_view()); + + EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0], + initial_signal_level * gain_factor, 0.1f); +} + +TEST(AutomaticGainController2GainApplier, RampingIsDone) { + constexpr float initial_signal_level = 30000.f; + constexpr float initial_gain_factor = 1.f; + constexpr float target_gain_factor = 0.5f; + constexpr int num_channels = 3; + constexpr int samples_per_channel = 4; + VectorFloatFrame fake_audio(num_channels, samples_per_channel, + initial_signal_level); + GainApplier gain_applier(false, initial_gain_factor); + + gain_applier.SetGainFactor(target_gain_factor); + gain_applier.ApplyGain(fake_audio.float_frame_view()); + + // The maximal gain change should be close to that in linear interpolation. + for (size_t channel = 0; channel < num_channels; ++channel) { + float max_signal_change = 0.f; + float last_signal_level = initial_signal_level; + for (const auto sample : fake_audio.float_frame_view().channel(channel)) { + const float current_change = fabs(last_signal_level - sample); + max_signal_change = std::max(max_signal_change, current_change); + last_signal_level = sample; + } + const float total_gain_change = + fabs((initial_gain_factor - target_gain_factor) * initial_signal_level); + EXPECT_NEAR(max_signal_change, total_gain_change / samples_per_channel, + 0.1f); + } + + // Next frame should have the desired level. + VectorFloatFrame next_fake_audio_frame(num_channels, samples_per_channel, + initial_signal_level); + gain_applier.ApplyGain(next_fake_audio_frame.float_frame_view()); + + // The last sample should have the new gain. + EXPECT_NEAR(next_fake_audio_frame.float_frame_view().channel(0)[0], + initial_signal_level * target_gain_factor, 0.1f); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc new file mode 100644 index 0000000000..bb6e038514 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" + +#include <algorithm> +#include <iterator> + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/strings/string_builder.h" + +namespace webrtc { + +constexpr std::array<float, kInterpolatedGainCurveTotalPoints> + InterpolatedGainCurve::approximation_params_x_; + +constexpr std::array<float, kInterpolatedGainCurveTotalPoints> + InterpolatedGainCurve::approximation_params_m_; + +constexpr std::array<float, kInterpolatedGainCurveTotalPoints> + InterpolatedGainCurve::approximation_params_q_; + +InterpolatedGainCurve::InterpolatedGainCurve( + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix) + : region_logger_( + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Identity") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Knee") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix << ".FixedDigitalGainCurveRegion.Limiter") + .str(), + (rtc::StringBuilder("WebRTC.Audio.") + << histogram_name_prefix + << ".FixedDigitalGainCurveRegion.Saturation") + .str()), + apm_data_dumper_(apm_data_dumper) {} + +InterpolatedGainCurve::~InterpolatedGainCurve() { + if (stats_.available) { + RTC_DCHECK(apm_data_dumper_); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_identity", + stats_.look_ups_identity_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_knee", + stats_.look_ups_knee_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_limiter", + stats_.look_ups_limiter_region); + apm_data_dumper_->DumpRaw("agc2_interp_gain_curve_lookups_saturation", + stats_.look_ups_saturation_region); + region_logger_.LogRegionStats(stats_); + } +} + +InterpolatedGainCurve::RegionLogger::RegionLogger( + absl::string_view identity_histogram_name, + absl::string_view knee_histogram_name, + absl::string_view limiter_histogram_name, + absl::string_view saturation_histogram_name) + : identity_histogram( + metrics::HistogramFactoryGetCounts(identity_histogram_name, + 1, + 10000, + 50)), + knee_histogram(metrics::HistogramFactoryGetCounts(knee_histogram_name, + 1, + 10000, + 50)), + limiter_histogram( + metrics::HistogramFactoryGetCounts(limiter_histogram_name, + 1, + 10000, + 50)), + saturation_histogram( + metrics::HistogramFactoryGetCounts(saturation_histogram_name, + 1, + 10000, + 50)) {} + +InterpolatedGainCurve::RegionLogger::~RegionLogger() = default; + +void InterpolatedGainCurve::RegionLogger::LogRegionStats( + const InterpolatedGainCurve::Stats& stats) const { + using Region = InterpolatedGainCurve::GainCurveRegion; + const int duration_s = + stats.region_duration_frames / (1000 / kFrameDurationMs); + + switch (stats.region) { + case Region::kIdentity: { + if (identity_histogram) { + metrics::HistogramAdd(identity_histogram, duration_s); + } + break; + } + case Region::kKnee: { + if (knee_histogram) { + metrics::HistogramAdd(knee_histogram, duration_s); + } + break; + } + case Region::kLimiter: { + if (limiter_histogram) { + metrics::HistogramAdd(limiter_histogram, duration_s); + } + break; + } + case Region::kSaturation: { + if (saturation_histogram) { + metrics::HistogramAdd(saturation_histogram, duration_s); + } + break; + } + default: { + RTC_DCHECK_NOTREACHED(); + } + } +} + +void InterpolatedGainCurve::UpdateStats(float input_level) const { + stats_.available = true; + + GainCurveRegion region; + + if (input_level < approximation_params_x_[0]) { + stats_.look_ups_identity_region++; + region = GainCurveRegion::kIdentity; + } else if (input_level < + approximation_params_x_[kInterpolatedGainCurveKneePoints - 1]) { + stats_.look_ups_knee_region++; + region = GainCurveRegion::kKnee; + } else if (input_level < kMaxInputLevelLinear) { + stats_.look_ups_limiter_region++; + region = GainCurveRegion::kLimiter; + } else { + stats_.look_ups_saturation_region++; + region = GainCurveRegion::kSaturation; + } + + if (region == stats_.region) { + ++stats_.region_duration_frames; + } else { + region_logger_.LogRegionStats(stats_); + + stats_.region_duration_frames = 0; + stats_.region = region; + } +} + +// Looks up a gain to apply given a non-negative input level. +// The cost of this operation depends on the region in which `input_level` +// falls. +// For the identity and the saturation regions the cost is O(1). +// For the other regions, namely knee and limiter, the cost is +// O(2 + log2(`LightkInterpolatedGainCurveTotalPoints`), plus O(1) for the +// linear interpolation (one product and one sum). +float InterpolatedGainCurve::LookUpGainToApply(float input_level) const { + UpdateStats(input_level); + + if (input_level <= approximation_params_x_[0]) { + // Identity region. + return 1.0f; + } + + if (input_level >= kMaxInputLevelLinear) { + // Saturating lower bound. The saturing samples exactly hit the clipping + // level. This method achieves has the lowest harmonic distorsion, but it + // may reduce the amplitude of the non-saturating samples too much. + return 32768.f / input_level; + } + + // Knee and limiter regions; find the linear piece index. Spelling + // out the complete type was the only way to silence both the clang + // plugin and the windows compilers. + std::array<float, kInterpolatedGainCurveTotalPoints>::const_iterator it = + std::lower_bound(approximation_params_x_.begin(), + approximation_params_x_.end(), input_level); + const size_t index = std::distance(approximation_params_x_.begin(), it) - 1; + RTC_DCHECK_LE(0, index); + RTC_DCHECK_LT(index, approximation_params_m_.size()); + RTC_DCHECK_LE(approximation_params_x_[index], input_level); + if (index < approximation_params_m_.size() - 1) { + RTC_DCHECK_LE(input_level, approximation_params_x_[index + 1]); + } + + // Piece-wise linear interploation. + const float gain = approximation_params_m_[index] * input_level + + approximation_params_q_[index]; + RTC_DCHECK_LE(0.f, gain); + return gain; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h new file mode 100644 index 0000000000..8dd3e48f21 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ + +#include <array> + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/gtest_prod_util.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +class ApmDataDumper; + +constexpr float kInputLevelScalingFactor = 32768.0f; + +// Defined as DbfsToLinear(kLimiterMaxInputLevelDbFs) +constexpr float kMaxInputLevelLinear = static_cast<float>(36766.300710566735); + +// Interpolated gain curve using under-approximation to avoid saturation. +// +// The goal of this class is allowing fast look ups to get an accurate +// estimates of the gain to apply given an estimated input level. +class InterpolatedGainCurve { + public: + enum class GainCurveRegion { + kIdentity = 0, + kKnee = 1, + kLimiter = 2, + kSaturation = 3 + }; + + struct Stats { + // Region in which the output level equals the input one. + size_t look_ups_identity_region = 0; + // Smoothing between the identity and the limiter regions. + size_t look_ups_knee_region = 0; + // Limiter region in which the output and input levels are linearly related. + size_t look_ups_limiter_region = 0; + // Region in which saturation may occur since the input level is beyond the + // maximum expected by the limiter. + size_t look_ups_saturation_region = 0; + // True if stats have been populated. + bool available = false; + + // The current region, and for how many frames the level has been + // in that region. + GainCurveRegion region = GainCurveRegion::kIdentity; + int64_t region_duration_frames = 0; + }; + + InterpolatedGainCurve(ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix); + ~InterpolatedGainCurve(); + + InterpolatedGainCurve(const InterpolatedGainCurve&) = delete; + InterpolatedGainCurve& operator=(const InterpolatedGainCurve&) = delete; + + Stats get_stats() const { return stats_; } + + // Given a non-negative input level (linear scale), a scalar factor to apply + // to a sub-frame is returned. + // Levels above kLimiterMaxInputLevelDbFs will be reduced to 0 dBFS + // after applying this gain + float LookUpGainToApply(float input_level) const; + + private: + // For comparing 'approximation_params_*_' with ones computed by + // ComputeInterpolatedGainCurve. + FRIEND_TEST_ALL_PREFIXES(GainController2InterpolatedGainCurve, + CheckApproximationParams); + + struct RegionLogger { + metrics::Histogram* identity_histogram; + metrics::Histogram* knee_histogram; + metrics::Histogram* limiter_histogram; + metrics::Histogram* saturation_histogram; + + RegionLogger(absl::string_view identity_histogram_name, + absl::string_view knee_histogram_name, + absl::string_view limiter_histogram_name, + absl::string_view saturation_histogram_name); + + ~RegionLogger(); + + void LogRegionStats(const InterpolatedGainCurve::Stats& stats) const; + } region_logger_; + + void UpdateStats(float input_level) const; + + ApmDataDumper* const apm_data_dumper_; + + static constexpr std::array<float, kInterpolatedGainCurveTotalPoints> + approximation_params_x_ = { + {30057.296875, 30148.986328125, 30240.67578125, 30424.052734375, + 30607.4296875, 30790.806640625, 30974.18359375, 31157.560546875, + 31340.939453125, 31524.31640625, 31707.693359375, 31891.0703125, + 32074.447265625, 32257.82421875, 32441.201171875, 32624.580078125, + 32807.95703125, 32991.33203125, 33174.7109375, 33358.08984375, + 33541.46484375, 33724.84375, 33819.53515625, 34009.5390625, + 34200.05859375, 34389.81640625, 34674.48828125, 35054.375, + 35434.86328125, 35814.81640625, 36195.16796875, 36575.03125}}; + static constexpr std::array<float, kInterpolatedGainCurveTotalPoints> + approximation_params_m_ = { + {-3.515235675877192989e-07, -1.050251626111275982e-06, + -2.085213736791047268e-06, -3.443004743530764244e-06, + -4.773849468620028347e-06, -6.077375928725814447e-06, + -7.353257842623861507e-06, -8.601219633419532329e-06, + -9.821013009059242904e-06, -1.101243378798244521e-05, + -1.217532644659513608e-05, -1.330956911260727793e-05, + -1.441507538402220234e-05, -1.549179251014720649e-05, + -1.653970684856176376e-05, -1.755882840370759368e-05, + -1.854918446042574942e-05, -1.951086778717581183e-05, + -2.044398024736437947e-05, -2.1348627342376858e-05, + -2.222496914328075945e-05, -2.265374678245279938e-05, + -2.242570917587727308e-05, -2.220122041762806475e-05, + -2.19802095671184361e-05, -2.176260204578284174e-05, + -2.133731686626560986e-05, -2.092481918225530535e-05, + -2.052459603874012828e-05, -2.013615448959171772e-05, + -1.975903069251216948e-05, -1.939277899509761482e-05}}; + + static constexpr std::array<float, kInterpolatedGainCurveTotalPoints> + approximation_params_q_ = { + {1.010565876960754395, 1.031631827354431152, 1.062929749488830566, + 1.104239225387573242, 1.144973039627075195, 1.185109615325927734, + 1.224629044532775879, 1.263512492179870605, 1.301741957664489746, + 1.339300632476806641, 1.376173257827758789, 1.412345528602600098, + 1.447803974151611328, 1.482536554336547852, 1.516532182693481445, + 1.549780607223510742, 1.582272171974182129, 1.613999366760253906, + 1.644955039024353027, 1.675132393836975098, 1.704526185989379883, + 1.718986630439758301, 1.711274504661560059, 1.703639745712280273, + 1.696081161499023438, 1.688597679138183594, 1.673851132392883301, + 1.659391283988952637, 1.645209431648254395, 1.631297469139099121, + 1.617647409439086914, 1.604251742362976074}}; + + // Stats. + mutable Stats stats_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_INTERPOLATED_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc new file mode 100644 index 0000000000..7861ae997d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/interpolated_gain_curve_unittest.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" + +#include <array> +#include <type_traits> +#include <vector> + +#include "api/array_view.h" +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/compute_interpolated_gain_curve.h" +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr double kLevelEpsilon = 1e-2 * kMaxAbsFloatS16Value; +constexpr float kInterpolatedGainCurveTolerance = 1.f / 32768.f; +ApmDataDumper apm_data_dumper(0); +static_assert(std::is_trivially_destructible<LimiterDbGainCurve>::value, ""); +const LimiterDbGainCurve limiter; + +} // namespace + +TEST(GainController2InterpolatedGainCurve, CreateUse) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, DbfsToFloatS16(limiter.max_input_level_db() + 1), 500); + for (const auto level : levels) { + EXPECT_GE(igc.LookUpGainToApply(level), 0.0f); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckValidOutput) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() * 2.0, 500); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + const float gain = igc.LookUpGainToApply(level); + EXPECT_LE(0.0f, gain); + EXPECT_LE(gain, 1.0f); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckMonotonicity) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() + kLevelEpsilon + 0.5, + 500); + float prev_gain = igc.LookUpGainToApply(0.0f); + for (const auto level : levels) { + const float gain = igc.LookUpGainToApply(level); + EXPECT_GE(prev_gain, gain); + prev_gain = gain; + } +} + +TEST(GainController2InterpolatedGainCurve, CheckApproximation) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + kLevelEpsilon, limiter.max_input_level_linear() - kLevelEpsilon, 500); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_LT( + std::fabs(limiter.GetGainLinear(level) - igc.LookUpGainToApply(level)), + kInterpolatedGainCurveTolerance); + } +} + +TEST(GainController2InterpolatedGainCurve, CheckRegionBoundaries) { + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const std::vector<double> levels{ + {kLevelEpsilon, limiter.knee_start_linear() + kLevelEpsilon, + limiter.limiter_start_linear() + kLevelEpsilon, + limiter.max_input_level_linear() + kLevelEpsilon}}; + for (const auto level : levels) { + igc.LookUpGainToApply(level); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(1ul, stats.look_ups_identity_region); + EXPECT_EQ(1ul, stats.look_ups_knee_region); + EXPECT_EQ(1ul, stats.look_ups_limiter_region); + EXPECT_EQ(1ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckIdentityRegion) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = + test::LinSpace(kLevelEpsilon, limiter.knee_start_linear(), kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_EQ(1.0f, igc.LookUpGainToApply(level)); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(kNumSteps - 1, stats.look_ups_identity_region); + EXPECT_EQ(1ul, stats.look_ups_knee_region); + EXPECT_EQ(0ul, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationKnee) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = + test::LinSpace(limiter.knee_start_linear() + kLevelEpsilon, + limiter.limiter_start_linear(), kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + // Small tolerance added (needed because comparing a float with a double). + EXPECT_LE(igc.LookUpGainToApply(level), + limiter.GetGainLinear(level) + 1e-7); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(kNumSteps - 1, stats.look_ups_knee_region); + EXPECT_EQ(1ul, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationBeyondKnee) { + constexpr size_t kNumSteps = 10; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + limiter.limiter_start_linear() + kLevelEpsilon, + limiter.max_input_level_linear() - kLevelEpsilon, kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + // Small tolerance added (needed because comparing a float with a double). + EXPECT_LE(igc.LookUpGainToApply(level), + limiter.GetGainLinear(level) + 1e-7); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(0ul, stats.look_ups_knee_region); + EXPECT_EQ(kNumSteps, stats.look_ups_limiter_region); + EXPECT_EQ(0ul, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, + CheckNoOverApproximationWithSaturation) { + constexpr size_t kNumSteps = 3; + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + const auto levels = test::LinSpace( + limiter.max_input_level_linear() + kLevelEpsilon, + limiter.max_input_level_linear() + kLevelEpsilon + 0.5, kNumSteps); + for (const auto level : levels) { + SCOPED_TRACE(std::to_string(level)); + EXPECT_LE(igc.LookUpGainToApply(level), limiter.GetGainLinear(level)); + } + + const auto stats = igc.get_stats(); + EXPECT_EQ(0ul, stats.look_ups_identity_region); + EXPECT_EQ(0ul, stats.look_ups_knee_region); + EXPECT_EQ(0ul, stats.look_ups_limiter_region); + EXPECT_EQ(kNumSteps, stats.look_ups_saturation_region); +} + +TEST(GainController2InterpolatedGainCurve, CheckApproximationParams) { + test::InterpolatedParameters parameters = + test::ComputeInterpolatedGainCurveApproximationParams(); + + InterpolatedGainCurve igc(&apm_data_dumper, ""); + + for (size_t i = 0; i < kInterpolatedGainCurveTotalPoints; ++i) { + // The tolerance levels are chosen to account for deviations due + // to computing with single precision floating point numbers. + EXPECT_NEAR(igc.approximation_params_x_[i], + parameters.computed_approximation_params_x[i], 0.9f); + EXPECT_NEAR(igc.approximation_params_m_[i], + parameters.computed_approximation_params_m[i], 0.00001f); + EXPECT_NEAR(igc.approximation_params_q_[i], + parameters.computed_approximation_params_q[i], 0.001f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc new file mode 100644 index 0000000000..7a1e2202be --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter.h" + +#include <algorithm> +#include <array> +#include <cmath> + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// This constant affects the way scaling factors are interpolated for the first +// sub-frame of a frame. Only in the case in which the first sub-frame has an +// estimated level which is greater than the that of the previous analyzed +// sub-frame, linear interpolation is replaced with a power function which +// reduces the chances of over-shooting (and hence saturation), however reducing +// the fixed gain effectiveness. +constexpr float kAttackFirstSubframeInterpolationPower = 8.0f; + +void InterpolateFirstSubframe(float last_factor, + float current_factor, + rtc::ArrayView<float> subframe) { + const int n = rtc::dchecked_cast<int>(subframe.size()); + constexpr float p = kAttackFirstSubframeInterpolationPower; + for (int i = 0; i < n; ++i) { + subframe[i] = std::pow(1.f - i / n, p) * (last_factor - current_factor) + + current_factor; + } +} + +void ComputePerSampleSubframeFactors( + const std::array<float, kSubFramesInFrame + 1>& scaling_factors, + int samples_per_channel, + rtc::ArrayView<float> per_sample_scaling_factors) { + const int num_subframes = scaling_factors.size() - 1; + const int subframe_size = + rtc::CheckedDivExact(samples_per_channel, num_subframes); + + // Handle first sub-frame differently in case of attack. + const bool is_attack = scaling_factors[0] > scaling_factors[1]; + if (is_attack) { + InterpolateFirstSubframe( + scaling_factors[0], scaling_factors[1], + rtc::ArrayView<float>( + per_sample_scaling_factors.subview(0, subframe_size))); + } + + for (int i = is_attack ? 1 : 0; i < num_subframes; ++i) { + const int subframe_start = i * subframe_size; + const float scaling_start = scaling_factors[i]; + const float scaling_end = scaling_factors[i + 1]; + const float scaling_diff = (scaling_end - scaling_start) / subframe_size; + for (int j = 0; j < subframe_size; ++j) { + per_sample_scaling_factors[subframe_start + j] = + scaling_start + scaling_diff * j; + } + } +} + +void ScaleSamples(rtc::ArrayView<const float> per_sample_scaling_factors, + AudioFrameView<float> signal) { + const int samples_per_channel = signal.samples_per_channel(); + RTC_DCHECK_EQ(samples_per_channel, per_sample_scaling_factors.size()); + for (int i = 0; i < signal.num_channels(); ++i) { + rtc::ArrayView<float> channel = signal.channel(i); + for (int j = 0; j < samples_per_channel; ++j) { + channel[j] = rtc::SafeClamp(channel[j] * per_sample_scaling_factors[j], + kMinFloatS16Value, kMaxFloatS16Value); + } + } +} + +void CheckLimiterSampleRate(int sample_rate_hz) { + // Check that per_sample_scaling_factors_ is large enough. + RTC_DCHECK_LE(sample_rate_hz, + kMaximalNumberOfSamplesPerChannel * 1000 / kFrameDurationMs); +} + +} // namespace + +Limiter::Limiter(int sample_rate_hz, + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name) + : interp_gain_curve_(apm_data_dumper, histogram_name), + level_estimator_(sample_rate_hz, apm_data_dumper), + apm_data_dumper_(apm_data_dumper) { + CheckLimiterSampleRate(sample_rate_hz); +} + +Limiter::~Limiter() = default; + +void Limiter::Process(AudioFrameView<float> signal) { + const std::array<float, kSubFramesInFrame> level_estimate = + level_estimator_.ComputeLevel(signal); + + RTC_DCHECK_EQ(level_estimate.size() + 1, scaling_factors_.size()); + scaling_factors_[0] = last_scaling_factor_; + std::transform(level_estimate.begin(), level_estimate.end(), + scaling_factors_.begin() + 1, [this](float x) { + return interp_gain_curve_.LookUpGainToApply(x); + }); + + const int samples_per_channel = signal.samples_per_channel(); + RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel); + + auto per_sample_scaling_factors = rtc::ArrayView<float>( + &per_sample_scaling_factors_[0], samples_per_channel); + ComputePerSampleSubframeFactors(scaling_factors_, samples_per_channel, + per_sample_scaling_factors); + ScaleSamples(per_sample_scaling_factors, signal); + + last_scaling_factor_ = scaling_factors_.back(); + + // Dump data for debug. + apm_data_dumper_->DumpRaw("agc2_limiter_last_scaling_factor", + last_scaling_factor_); + apm_data_dumper_->DumpRaw( + "agc2_limiter_region", + static_cast<int>(interp_gain_curve_.get_stats().region)); +} + +InterpolatedGainCurve::Stats Limiter::GetGainCurveStats() const { + return interp_gain_curve_.get_stats(); +} + +void Limiter::SetSampleRate(int sample_rate_hz) { + CheckLimiterSampleRate(sample_rate_hz); + level_estimator_.SetSampleRate(sample_rate_hz); +} + +void Limiter::Reset() { + level_estimator_.Reset(); +} + +float Limiter::LastAudioLevel() const { + return level_estimator_.LastAudioLevel(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h new file mode 100644 index 0000000000..d4d556349c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ + +#include <vector> + +#include "absl/strings/string_view.h" +#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h" +#include "modules/audio_processing/agc2/interpolated_gain_curve.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class ApmDataDumper; + +class Limiter { + public: + Limiter(int sample_rate_hz, + ApmDataDumper* apm_data_dumper, + absl::string_view histogram_name_prefix); + Limiter(const Limiter& limiter) = delete; + Limiter& operator=(const Limiter& limiter) = delete; + ~Limiter(); + + // Applies limiter and hard-clipping to `signal`. + void Process(AudioFrameView<float> signal); + InterpolatedGainCurve::Stats GetGainCurveStats() const; + + // Supported rates must be + // * supported by FixedDigitalLevelEstimator + // * below kMaximalNumberOfSamplesPerChannel*1000/kFrameDurationMs + // so that samples_per_channel fit in the + // per_sample_scaling_factors_ array. + void SetSampleRate(int sample_rate_hz); + + // Resets the internal state. + void Reset(); + + float LastAudioLevel() const; + + private: + const InterpolatedGainCurve interp_gain_curve_; + FixedDigitalLevelEstimator level_estimator_; + ApmDataDumper* const apm_data_dumper_ = nullptr; + + // Work array containing the sub-frame scaling factors to be interpolated. + std::array<float, kSubFramesInFrame + 1> scaling_factors_ = {}; + std::array<float, kMaximalNumberOfSamplesPerChannel> + per_sample_scaling_factors_ = {}; + float last_scaling_factor_ = 1.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_LIMITER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc new file mode 100644 index 0000000000..d47c0b2e17 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" + +#include <cmath> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +double ComputeKneeStart(double max_input_level_db, + double knee_smoothness_db, + double compression_ratio) { + RTC_CHECK_LT((compression_ratio - 1.0) * knee_smoothness_db / + (2.0 * compression_ratio), + max_input_level_db); + return -knee_smoothness_db / 2.0 - + max_input_level_db / (compression_ratio - 1.0); +} + +std::array<double, 3> ComputeKneeRegionPolynomial(double knee_start_dbfs, + double knee_smoothness_db, + double compression_ratio) { + const double a = (1.0 - compression_ratio) / + (2.0 * knee_smoothness_db * compression_ratio); + const double b = 1.0 - 2.0 * a * knee_start_dbfs; + const double c = a * knee_start_dbfs * knee_start_dbfs; + return {{a, b, c}}; +} + +double ComputeLimiterD1(double max_input_level_db, double compression_ratio) { + return (std::pow(10.0, -max_input_level_db / (20.0 * compression_ratio)) * + (1.0 - compression_ratio) / compression_ratio) / + kMaxAbsFloatS16Value; +} + +constexpr double ComputeLimiterD2(double compression_ratio) { + return (1.0 - 2.0 * compression_ratio) / compression_ratio; +} + +double ComputeLimiterI2(double max_input_level_db, + double compression_ratio, + double gain_curve_limiter_i1) { + RTC_CHECK_NE(gain_curve_limiter_i1, 0.f); + return std::pow(10.0, -max_input_level_db / (20.0 * compression_ratio)) / + gain_curve_limiter_i1 / + std::pow(kMaxAbsFloatS16Value, gain_curve_limiter_i1 - 1); +} + +} // namespace + +LimiterDbGainCurve::LimiterDbGainCurve() + : max_input_level_linear_(DbfsToFloatS16(max_input_level_db_)), + knee_start_dbfs_(ComputeKneeStart(max_input_level_db_, + knee_smoothness_db_, + compression_ratio_)), + knee_start_linear_(DbfsToFloatS16(knee_start_dbfs_)), + limiter_start_dbfs_(knee_start_dbfs_ + knee_smoothness_db_), + limiter_start_linear_(DbfsToFloatS16(limiter_start_dbfs_)), + knee_region_polynomial_(ComputeKneeRegionPolynomial(knee_start_dbfs_, + knee_smoothness_db_, + compression_ratio_)), + gain_curve_limiter_d1_( + ComputeLimiterD1(max_input_level_db_, compression_ratio_)), + gain_curve_limiter_d2_(ComputeLimiterD2(compression_ratio_)), + gain_curve_limiter_i1_(1.0 / compression_ratio_), + gain_curve_limiter_i2_(ComputeLimiterI2(max_input_level_db_, + compression_ratio_, + gain_curve_limiter_i1_)) { + static_assert(knee_smoothness_db_ > 0.0f, ""); + static_assert(compression_ratio_ > 1.0f, ""); + RTC_CHECK_GE(max_input_level_db_, knee_start_dbfs_ + knee_smoothness_db_); +} + +constexpr double LimiterDbGainCurve::max_input_level_db_; +constexpr double LimiterDbGainCurve::knee_smoothness_db_; +constexpr double LimiterDbGainCurve::compression_ratio_; + +double LimiterDbGainCurve::GetOutputLevelDbfs(double input_level_dbfs) const { + if (input_level_dbfs < knee_start_dbfs_) { + return input_level_dbfs; + } else if (input_level_dbfs < limiter_start_dbfs_) { + return GetKneeRegionOutputLevelDbfs(input_level_dbfs); + } + return GetCompressorRegionOutputLevelDbfs(input_level_dbfs); +} + +double LimiterDbGainCurve::GetGainLinear(double input_level_linear) const { + if (input_level_linear < knee_start_linear_) { + return 1.0; + } + return DbfsToFloatS16( + GetOutputLevelDbfs(FloatS16ToDbfs(input_level_linear))) / + input_level_linear; +} + +// Computes the first derivative of GetGainLinear() in `x`. +double LimiterDbGainCurve::GetGainFirstDerivativeLinear(double x) const { + // Beyond-knee region only. + RTC_CHECK_GE(x, limiter_start_linear_ - 1e-7 * kMaxAbsFloatS16Value); + return gain_curve_limiter_d1_ * + std::pow(x / kMaxAbsFloatS16Value, gain_curve_limiter_d2_); +} + +// Computes the integral of GetGainLinear() in the range [x0, x1]. +double LimiterDbGainCurve::GetGainIntegralLinear(double x0, double x1) const { + RTC_CHECK_LE(x0, x1); // Valid interval. + RTC_CHECK_GE(x0, limiter_start_linear_); // Beyond-knee region only. + auto limiter_integral = [this](const double& x) { + return gain_curve_limiter_i2_ * std::pow(x, gain_curve_limiter_i1_); + }; + return limiter_integral(x1) - limiter_integral(x0); +} + +double LimiterDbGainCurve::GetKneeRegionOutputLevelDbfs( + double input_level_dbfs) const { + return knee_region_polynomial_[0] * input_level_dbfs * input_level_dbfs + + knee_region_polynomial_[1] * input_level_dbfs + + knee_region_polynomial_[2]; +} + +double LimiterDbGainCurve::GetCompressorRegionOutputLevelDbfs( + double input_level_dbfs) const { + return (input_level_dbfs - max_input_level_db_) / compression_ratio_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h new file mode 100644 index 0000000000..9086e26739 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ + +#include <array> + +#include "modules/audio_processing/agc2/agc2_testing_common.h" + +namespace webrtc { + +// A class for computing a limiter gain curve (in dB scale) given a set of +// hard-coded parameters (namely, kLimiterDbGainCurveMaxInputLevelDbFs, +// kLimiterDbGainCurveKneeSmoothnessDb, and +// kLimiterDbGainCurveCompressionRatio). The generated curve consists of four +// regions: identity (linear), knee (quadratic polynomial), compression +// (linear), saturation (linear). The aforementioned constants are used to shape +// the different regions. +class LimiterDbGainCurve { + public: + LimiterDbGainCurve(); + + double max_input_level_db() const { return max_input_level_db_; } + double max_input_level_linear() const { return max_input_level_linear_; } + double knee_start_linear() const { return knee_start_linear_; } + double limiter_start_linear() const { return limiter_start_linear_; } + + // These methods can be marked 'constexpr' in C++ 14. + double GetOutputLevelDbfs(double input_level_dbfs) const; + double GetGainLinear(double input_level_linear) const; + double GetGainFirstDerivativeLinear(double x) const; + double GetGainIntegralLinear(double x0, double x1) const; + + private: + double GetKneeRegionOutputLevelDbfs(double input_level_dbfs) const; + double GetCompressorRegionOutputLevelDbfs(double input_level_dbfs) const; + + static constexpr double max_input_level_db_ = test::kLimiterMaxInputLevelDbFs; + static constexpr double knee_smoothness_db_ = test::kLimiterKneeSmoothnessDb; + static constexpr double compression_ratio_ = test::kLimiterCompressionRatio; + + const double max_input_level_linear_; + + // Do not modify signal with level <= knee_start_dbfs_. + const double knee_start_dbfs_; + const double knee_start_linear_; + + // The upper end of the knee region, which is between knee_start_dbfs_ and + // limiter_start_dbfs_. + const double limiter_start_dbfs_; + const double limiter_start_linear_; + + // Coefficients {a, b, c} of the knee region polynomial + // ax^2 + bx + c in the DB scale. + const std::array<double, 3> knee_region_polynomial_; + + // Parameters for the computation of the first derivative of GetGainLinear(). + const double gain_curve_limiter_d1_; + const double gain_curve_limiter_d2_; + + // Parameters for the computation of the integral of GetGainLinear(). + const double gain_curve_limiter_i1_; + const double gain_curve_limiter_i2_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_LIMITER_DB_GAIN_CURVE_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc new file mode 100644 index 0000000000..049c8d568e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_db_gain_curve_unittest.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter_db_gain_curve.h" + +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(FixedDigitalGainController2Limiter, ConstructDestruct) { + LimiterDbGainCurve l; +} + +TEST(FixedDigitalGainController2Limiter, GainCurveShouldBeMonotone) { + LimiterDbGainCurve l; + float last_output_level = 0.f; + bool has_last_output_level = false; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + if (!has_last_output_level) { + last_output_level = current_output_level; + has_last_output_level = true; + } + EXPECT_LE(last_output_level, current_output_level); + last_output_level = current_output_level; + } +} + +TEST(FixedDigitalGainController2Limiter, GainCurveShouldBeContinuous) { + LimiterDbGainCurve l; + float last_output_level = 0.f; + bool has_last_output_level = false; + constexpr float kMaxDelta = 0.5f; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + if (!has_last_output_level) { + last_output_level = current_output_level; + has_last_output_level = true; + } + EXPECT_LE(current_output_level, last_output_level + kMaxDelta); + last_output_level = current_output_level; + } +} + +TEST(FixedDigitalGainController2Limiter, OutputGainShouldBeLessThanFullScale) { + LimiterDbGainCurve l; + for (float level = -90.f; level <= l.max_input_level_db(); level += 0.5f) { + const float current_output_level = l.GetOutputLevelDbfs(level); + EXPECT_LE(current_output_level, 0.f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc new file mode 100644 index 0000000000..e662a7fc89 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/limiter_unittest.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/limiter.h" + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { + +TEST(Limiter, LimiterShouldConstructAndRun) { + const int sample_rate_hz = 48000; + ApmDataDumper apm_data_dumper(0); + + Limiter limiter(sample_rate_hz, &apm_data_dumper, ""); + + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + kMaxAbsFloatS16Value); + limiter.Process(vectors_with_float_frame.float_frame_view()); +} + +TEST(Limiter, OutputVolumeAboveThreshold) { + const int sample_rate_hz = 48000; + const float input_level = + (kMaxAbsFloatS16Value + DbfsToFloatS16(test::kLimiterMaxInputLevelDbFs)) / + 2.f; + ApmDataDumper apm_data_dumper(0); + + Limiter limiter(sample_rate_hz, &apm_data_dumper, ""); + + // Give the level estimator time to adapt. + for (int i = 0; i < 5; ++i) { + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + input_level); + limiter.Process(vectors_with_float_frame.float_frame_view()); + } + + VectorFloatFrame vectors_with_float_frame(1, sample_rate_hz / 100, + input_level); + limiter.Process(vectors_with_float_frame.float_frame_view()); + rtc::ArrayView<const float> channel = + vectors_with_float_frame.float_frame_view().channel(0); + + for (const auto& sample : channel) { + EXPECT_LT(0.9f * kMaxAbsFloatS16Value, sample); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc new file mode 100644 index 0000000000..9fb1c24b65 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/noise_level_estimator.h" + +#include <stddef.h> + +#include <algorithm> +#include <cmath> +#include <numeric> + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kFramesPerSecond = 100; + +float FrameEnergy(const AudioFrameView<const float>& audio) { + float energy = 0.0f; + for (int k = 0; k < audio.num_channels(); ++k) { + float channel_energy = + std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.0f, + [](float a, float b) -> float { return a + b * b; }); + energy = std::max(channel_energy, energy); + } + return energy; +} + +float EnergyToDbfs(float signal_energy, int num_samples) { + RTC_DCHECK_GE(signal_energy, 0.0f); + const float rms_square = signal_energy / num_samples; + constexpr float kMinDbfs = -90.30899869919436f; + if (rms_square <= 1.0f) { + return kMinDbfs; + } + return 10.0f * std::log10(rms_square) + kMinDbfs; +} + +// Updates the noise floor with instant decay and slow attack. This tuning is +// specific for AGC2, so that (i) it can promptly increase the gain if the noise +// floor drops (instant decay) and (ii) in case of music or fast speech, due to +// which the noise floor can be overestimated, the gain reduction is slowed +// down. +float SmoothNoiseFloorEstimate(float current_estimate, float new_estimate) { + constexpr float kAttack = 0.5f; + if (current_estimate < new_estimate) { + // Attack phase. + return kAttack * new_estimate + (1.0f - kAttack) * current_estimate; + } + // Instant attack. + return new_estimate; +} + +class NoiseFloorEstimator : public NoiseLevelEstimator { + public: + // Update the noise floor every 5 seconds. + static constexpr int kUpdatePeriodNumFrames = 500; + static_assert(kUpdatePeriodNumFrames >= 200, + "A too small value may cause noise level overestimation."); + static_assert(kUpdatePeriodNumFrames <= 1500, + "A too large value may make AGC2 slow at reacting to increased " + "noise levels."); + + NoiseFloorEstimator(ApmDataDumper* data_dumper) : data_dumper_(data_dumper) { + // Initially assume that 48 kHz will be used. `Analyze()` will detect the + // used sample rate and call `Initialize()` again if needed. + Initialize(/*sample_rate_hz=*/48000); + } + NoiseFloorEstimator(const NoiseFloorEstimator&) = delete; + NoiseFloorEstimator& operator=(const NoiseFloorEstimator&) = delete; + ~NoiseFloorEstimator() = default; + + float Analyze(const AudioFrameView<const float>& frame) override { + // Detect sample rate changes. + const int sample_rate_hz = + static_cast<int>(frame.samples_per_channel() * kFramesPerSecond); + if (sample_rate_hz != sample_rate_hz_) { + Initialize(sample_rate_hz); + } + + const float frame_energy = FrameEnergy(frame); + if (frame_energy <= min_noise_energy_) { + // Ignore frames when muted or below the minimum measurable energy. + data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level", + noise_energy_); + return EnergyToDbfs(noise_energy_, + static_cast<int>(frame.samples_per_channel())); + } + + if (preliminary_noise_energy_set_) { + preliminary_noise_energy_ = + std::min(preliminary_noise_energy_, frame_energy); + } else { + preliminary_noise_energy_ = frame_energy; + preliminary_noise_energy_set_ = true; + } + data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level", + preliminary_noise_energy_); + + if (counter_ == 0) { + // Full period observed. + first_period_ = false; + // Update the estimated noise floor energy with the preliminary + // estimation. + noise_energy_ = SmoothNoiseFloorEstimate( + /*current_estimate=*/noise_energy_, + /*new_estimate=*/preliminary_noise_energy_); + // Reset for a new observation period. + counter_ = kUpdatePeriodNumFrames; + preliminary_noise_energy_set_ = false; + } else if (first_period_) { + // While analyzing the signal during the initial period, continuously + // update the estimated noise energy, which is monotonic. + noise_energy_ = preliminary_noise_energy_; + counter_--; + } else { + // During the observation period it's only allowed to lower the energy. + noise_energy_ = std::min(noise_energy_, preliminary_noise_energy_); + counter_--; + } + return EnergyToDbfs(noise_energy_, + static_cast<int>(frame.samples_per_channel())); + } + + private: + void Initialize(int sample_rate_hz) { + sample_rate_hz_ = sample_rate_hz; + first_period_ = true; + preliminary_noise_energy_set_ = false; + // Initialize the minimum noise energy to -84 dBFS. + min_noise_energy_ = sample_rate_hz * 2.0f * 2.0f / kFramesPerSecond; + preliminary_noise_energy_ = min_noise_energy_; + noise_energy_ = min_noise_energy_; + counter_ = kUpdatePeriodNumFrames; + } + + ApmDataDumper* const data_dumper_; + int sample_rate_hz_; + float min_noise_energy_; + bool first_period_; + bool preliminary_noise_energy_set_; + float preliminary_noise_energy_; + float noise_energy_; + int counter_; +}; + +} // namespace + +std::unique_ptr<NoiseLevelEstimator> CreateNoiseFloorEstimator( + ApmDataDumper* data_dumper) { + return std::make_unique<NoiseFloorEstimator>(data_dumper); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h new file mode 100644 index 0000000000..9f3b957486 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ + +#include <memory> + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class ApmDataDumper; + +// Noise level estimator interface. +class NoiseLevelEstimator { + public: + virtual ~NoiseLevelEstimator() = default; + // Analyzes a 10 ms `frame`, updates the noise level estimation and returns + // the value for the latter in dBFS. + virtual float Analyze(const AudioFrameView<const float>& frame) = 0; +}; + +// Creates a noise level estimator based on noise floor detection. +std::unique_ptr<NoiseLevelEstimator> CreateNoiseFloorEstimator( + ApmDataDumper* data_dumper); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build new file mode 100644 index 0000000000..6b53dda825 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_gn/moz.build @@ -0,0 +1,213 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "0" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("noise_level_estimator_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc new file mode 100644 index 0000000000..8168c5a229 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/noise_level_estimator_unittest.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/noise_level_estimator.h" + +#include <array> +#include <cmath> +#include <functional> +#include <limits> + +#include "api/function_view.h" +#include "modules/audio_processing/agc2/agc2_testing_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr int kNumIterations = 200; +constexpr int kFramesPerSecond = 100; + +// Runs the noise estimator on audio generated by 'sample_generator' +// for kNumIterations. Returns the last noise level estimate. +float RunEstimator(rtc::FunctionView<float()> sample_generator, + NoiseLevelEstimator& estimator, + int sample_rate_hz) { + const int samples_per_channel = + rtc::CheckedDivExact(sample_rate_hz, kFramesPerSecond); + VectorFloatFrame signal(1, samples_per_channel, 0.0f); + for (int i = 0; i < kNumIterations; ++i) { + AudioFrameView<float> frame_view = signal.float_frame_view(); + for (int j = 0; j < samples_per_channel; ++j) { + frame_view.channel(0)[j] = sample_generator(); + } + estimator.Analyze(frame_view); + } + return estimator.Analyze(signal.float_frame_view()); +} + +class NoiseEstimatorParametrization : public ::testing::TestWithParam<int> { + protected: + int sample_rate_hz() const { return GetParam(); } +}; + +// Checks that full scale white noise maps to about -5.5 dBFS. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithRandomNoise) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + test::WhiteNoiseGenerator gen(/*min_amplitude=*/test::kMinS16, + /*max_amplitude=*/test::kMaxS16); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + EXPECT_NEAR(noise_level_dbfs, -5.5f, 0.5f); +} + +// Checks that a full scale sine wave maps to about -3 dBFS. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithSineTone) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + test::SineGenerator gen(/*amplitude=*/test::kMaxS16, /*frequency_hz=*/600.0f, + sample_rate_hz()); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + EXPECT_NEAR(noise_level_dbfs, -3.0f, 0.1f); +} + +// Check that sufficiently spaced periodic pulses do not raise the estimated +// noise floor, which is determined by the amplitude of the non-pulse samples. +TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithPulseTone) { + ApmDataDumper data_dumper(0); + auto estimator = CreateNoiseFloorEstimator(&data_dumper); + + constexpr float kNoPulseAmplitude = 10.0f; + test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16, kNoPulseAmplitude, + /*frequency_hz=*/20.0f, sample_rate_hz()); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); + const float expected_noise_floor_dbfs = + 20.0f * std::log10f(kNoPulseAmplitude / test::kMaxS16); + EXPECT_NEAR(noise_level_dbfs, expected_noise_floor_dbfs, 0.5f); +} + +INSTANTIATE_TEST_SUITE_P(GainController2NoiseEstimator, + NoiseEstimatorParametrization, + ::testing::Values(8000, 16000, 32000, 48000)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn new file mode 100644 index 0000000000..d709eb3699 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/BUILD.gn @@ -0,0 +1,334 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +rtc_library("rnn_vad") { + visibility = [ "../*" ] + sources = [ + "features_extraction.cc", + "features_extraction.h", + "rnn.cc", + "rnn.h", + ] + + defines = [] + if (rtc_build_with_neon && target_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_common", + ":rnn_vad_layers", + ":rnn_vad_lp_residual", + ":rnn_vad_pitch", + ":rnn_vad_sequence_buffer", + ":rnn_vad_spectral_features", + "..:biquad_filter", + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "//third_party/rnnoise:rnn_vad", + ] +} + +rtc_library("rnn_vad_auto_correlation") { + sources = [ + "auto_correlation.cc", + "auto_correlation.h", + ] + deps = [ + ":rnn_vad_common", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../utility:pffft_wrapper", + ] +} + +rtc_source_set("rnn_vad_common") { + # TODO(alessiob): Make this target visibility private. + visibility = [ + ":*", + "..:vad_wrapper", + ] + sources = [ "common.h" ] + deps = [ + "../../../../rtc_base/system:arch", + "../../../../system_wrappers", + ] +} + +rtc_library("rnn_vad_lp_residual") { + sources = [ + "lp_residual.cc", + "lp_residual.h", + ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + ] +} + +rtc_source_set("rnn_vad_layers") { + sources = [ + "rnn_fc.cc", + "rnn_fc.h", + "rnn_gru.cc", + "rnn_gru.h", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_common", + ":vector_math", + "..:cpu_features", + "../../../../api:array_view", + "../../../../api:function_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + "//third_party/rnnoise:rnn_vad", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + +rtc_source_set("vector_math") { + sources = [ "vector_math.h" ] + deps = [ + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base/system:arch", + ] +} + +if (current_cpu == "x86" || current_cpu == "x64") { + rtc_library("vector_math_avx2") { + sources = [ "vector_math_avx2.cc" ] + if (is_win && !build_with_mozilla) { + cflags = [ "/arch:AVX2" ] + } else { + cflags = [ + "-mavx2", + "-mfma", + ] + } + deps = [ + ":vector_math", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + ] + } +} + +rtc_library("rnn_vad_pitch") { + sources = [ + "pitch_search.cc", + "pitch_search.h", + "pitch_search_internal.cc", + "pitch_search_internal.h", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad_auto_correlation", + ":rnn_vad_common", + ":vector_math", + "..:cpu_features", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:gtest_prod", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base/system:arch", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } +} + +rtc_source_set("rnn_vad_ring_buffer") { + sources = [ "ring_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + ] +} + +rtc_source_set("rnn_vad_sequence_buffer") { + sources = [ "sequence_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + ] +} + +rtc_library("rnn_vad_spectral_features") { + sources = [ + "spectral_features.cc", + "spectral_features.h", + "spectral_features_internal.cc", + "spectral_features_internal.h", + ] + deps = [ + ":rnn_vad_common", + ":rnn_vad_ring_buffer", + ":rnn_vad_symmetric_matrix_buffer", + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../utility:pffft_wrapper", + ] +} + +rtc_source_set("rnn_vad_symmetric_matrix_buffer") { + sources = [ "symmetric_matrix_buffer.h" ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + ] +} + +if (rtc_include_tests) { + rtc_library("test_utils") { + testonly = true + sources = [ + "test_utils.cc", + "test_utils.h", + ] + deps = [ + ":rnn_vad", + ":rnn_vad_common", + "../../../../api:array_view", + "../../../../api:scoped_refptr", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_compare", + "../../../../test:fileutils", + "../../../../test:test_support", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] + } + + unittest_resources = [ + "../../../../resources/audio_processing/agc2/rnn_vad/band_energies.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_buf_24k.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_lp_res.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/pitch_search_int.dat", + "../../../../resources/audio_processing/agc2/rnn_vad/samples.pcm", + "../../../../resources/audio_processing/agc2/rnn_vad/vad_prob.dat", + ] + + if (is_ios) { + bundle_data("unittests_bundle_data") { + testonly = true + sources = unittest_resources + outputs = [ "{{bundle_resources_dir}}/{{source_file_part}}" ] + } + } + + rtc_library("unittests") { + testonly = true + sources = [ + "auto_correlation_unittest.cc", + "features_extraction_unittest.cc", + "lp_residual_unittest.cc", + "pitch_search_internal_unittest.cc", + "pitch_search_unittest.cc", + "ring_buffer_unittest.cc", + "rnn_fc_unittest.cc", + "rnn_gru_unittest.cc", + "rnn_unittest.cc", + "rnn_vad_unittest.cc", + "sequence_buffer_unittest.cc", + "spectral_features_internal_unittest.cc", + "spectral_features_unittest.cc", + "symmetric_matrix_buffer_unittest.cc", + "vector_math_unittest.cc", + ] + + defines = [] + if (rtc_build_with_neon && current_cpu != "arm64") { + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + deps = [ + ":rnn_vad", + ":rnn_vad_auto_correlation", + ":rnn_vad_common", + ":rnn_vad_layers", + ":rnn_vad_lp_residual", + ":rnn_vad_pitch", + ":rnn_vad_ring_buffer", + ":rnn_vad_sequence_buffer", + ":rnn_vad_spectral_features", + ":rnn_vad_symmetric_matrix_buffer", + ":test_utils", + ":vector_math", + "..:cpu_features", + "../..:audioproc_test_utils", + "../../../../api:array_view", + "../../../../common_audio/", + "../../../../rtc_base:checks", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_compare", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base:stringutils", + "../../../../rtc_base/system:arch", + "../../../../test:test_support", + "../../utility:pffft_wrapper", + "//third_party/rnnoise:rnn_vad", + ] + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":vector_math_avx2" ] + } + absl_deps = [ "//third_party/abseil-cpp/absl/memory" ] + data = unittest_resources + if (is_ios) { + deps += [ ":unittests_bundle_data" ] + } + } + + if (!build_with_chromium) { + rtc_executable("rnn_vad_tool") { + testonly = true + sources = [ "rnn_vad_tool.cc" ] + deps = [ + ":rnn_vad", + ":rnn_vad_common", + "..:cpu_features", + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:logging", + "../../../../rtc_base:safe_compare", + "../../../../test:test_support", + "//third_party/abseil-cpp/absl/flags:flag", + "//third_party/abseil-cpp/absl/flags:parse", + ] + } + } +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS new file mode 100644 index 0000000000..773c2d7edd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/DEPS @@ -0,0 +1,3 @@ +include_rules = [ + "+third_party/rnnoise", +] diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc new file mode 100644 index 0000000000..3ddeec8dba --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" + +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kAutoCorrelationFftOrder = 9; // Length-512 FFT. +static_assert(1 << kAutoCorrelationFftOrder > + kNumLags12kHz + kBufSize12kHz - kMaxPitch12kHz, + ""); + +} // namespace + +AutoCorrelationCalculator::AutoCorrelationCalculator() + : fft_(1 << kAutoCorrelationFftOrder, Pffft::FftType::kReal), + tmp_(fft_.CreateBuffer()), + X_(fft_.CreateBuffer()), + H_(fft_.CreateBuffer()) {} + +AutoCorrelationCalculator::~AutoCorrelationCalculator() = default; + +// The auto-correlations coefficients are computed as follows: +// |.........|...........| <- pitch buffer +// [ x (fixed) ] +// [ y_0 ] +// [ y_{m-1} ] +// x and y are sub-array of equal length; x is never moved, whereas y slides. +// The cross-correlation between y_0 and x corresponds to the auto-correlation +// for the maximum pitch period. Hence, the first value in `auto_corr` has an +// inverted lag equal to 0 that corresponds to a lag equal to the maximum +// pitch period. +void AutoCorrelationCalculator::ComputeOnPitchBuffer( + rtc::ArrayView<const float, kBufSize12kHz> pitch_buf, + rtc::ArrayView<float, kNumLags12kHz> auto_corr) { + RTC_DCHECK_LT(auto_corr.size(), kMaxPitch12kHz); + RTC_DCHECK_GT(pitch_buf.size(), kMaxPitch12kHz); + constexpr int kFftFrameSize = 1 << kAutoCorrelationFftOrder; + constexpr int kConvolutionLength = kBufSize12kHz - kMaxPitch12kHz; + static_assert(kConvolutionLength == kFrameSize20ms12kHz, + "Mismatch between pitch buffer size, frame size and maximum " + "pitch period."); + static_assert(kFftFrameSize > kNumLags12kHz + kConvolutionLength, + "The FFT length is not sufficiently big to avoid cyclic " + "convolution errors."); + auto tmp = tmp_->GetView(); + + // Compute the FFT for the reversed reference frame - i.e., + // pitch_buf[-kConvolutionLength:]. + std::reverse_copy(pitch_buf.end() - kConvolutionLength, pitch_buf.end(), + tmp.begin()); + std::fill(tmp.begin() + kConvolutionLength, tmp.end(), 0.f); + fft_.ForwardTransform(*tmp_, H_.get(), /*ordered=*/false); + + // Compute the FFT for the sliding frames chunk. The sliding frames are + // defined as pitch_buf[i:i+kConvolutionLength] where i in + // [0, kNumLags12kHz). The chunk includes all of them, hence it is + // defined as pitch_buf[:kNumLags12kHz+kConvolutionLength]. + std::copy(pitch_buf.begin(), + pitch_buf.begin() + kConvolutionLength + kNumLags12kHz, + tmp.begin()); + std::fill(tmp.begin() + kNumLags12kHz + kConvolutionLength, tmp.end(), 0.f); + fft_.ForwardTransform(*tmp_, X_.get(), /*ordered=*/false); + + // Convolve in the frequency domain. + constexpr float kScalingFactor = 1.f / static_cast<float>(kFftFrameSize); + std::fill(tmp.begin(), tmp.end(), 0.f); + fft_.FrequencyDomainConvolve(*X_, *H_, tmp_.get(), kScalingFactor); + fft_.BackwardTransform(*tmp_, tmp_.get(), /*ordered=*/false); + + // Extract the auto-correlation coefficients. + std::copy(tmp.begin() + kConvolutionLength - 1, + tmp.begin() + kConvolutionLength + kNumLags12kHz - 1, + auto_corr.begin()); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h new file mode 100644 index 0000000000..1ae5054567 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ + +#include <memory> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" + +namespace webrtc { +namespace rnn_vad { + +// Class to compute the auto correlation on the pitch buffer for a target pitch +// interval. +class AutoCorrelationCalculator { + public: + AutoCorrelationCalculator(); + AutoCorrelationCalculator(const AutoCorrelationCalculator&) = delete; + AutoCorrelationCalculator& operator=(const AutoCorrelationCalculator&) = + delete; + ~AutoCorrelationCalculator(); + + // Computes the auto-correlation coefficients for a target pitch interval. + // `auto_corr` indexes are inverted lags. + void ComputeOnPitchBuffer( + rtc::ArrayView<const float, kBufSize12kHz> pitch_buf, + rtc::ArrayView<float, kNumLags12kHz> auto_corr); + + private: + Pffft fft_; + std::unique_ptr<Pffft::FloatBuffer> tmp_; + std::unique_ptr<Pffft::FloatBuffer> X_; + std::unique_ptr<Pffft::FloatBuffer> H_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_AUTO_CORRELATION_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc new file mode 100644 index 0000000000..76001ed7b7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation_unittest.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Checks that the auto correlation function produces output within tolerance +// given test input data. +TEST(RnnVadTest, PitchBufferAutoCorrelationWithinTolerance) { + PitchTestData test_data; + std::array<float, kBufSize12kHz> pitch_buf_decimated; + Decimate2x(test_data.PitchBuffer24kHzView(), pitch_buf_decimated); + std::array<float, kNumLags12kHz> computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + AutoCorrelationCalculator auto_corr_calculator; + auto_corr_calculator.ComputeOnPitchBuffer(pitch_buf_decimated, + computed_output); + } + auto auto_corr_view = test_data.AutoCorrelation12kHzView(); + ExpectNearAbsolute({auto_corr_view.data(), auto_corr_view.size()}, + computed_output, 3e-3f); +} + +// Checks that the auto correlation function computes the right thing for a +// simple use case. +TEST(RnnVadTest, CheckAutoCorrelationOnConstantPitchBuffer) { + // Create constant signal with no pitch. + std::array<float, kBufSize12kHz> pitch_buf_decimated; + std::fill(pitch_buf_decimated.begin(), pitch_buf_decimated.end(), 1.f); + std::array<float, kNumLags12kHz> computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + AutoCorrelationCalculator auto_corr_calculator; + auto_corr_calculator.ComputeOnPitchBuffer(pitch_buf_decimated, + computed_output); + } + // The expected output is a vector filled with the same expected + // auto-correlation value. The latter equals the length of a 20 ms frame. + constexpr int kFrameSize20ms12kHz = kFrameSize20ms24kHz / 2; + std::array<float, kNumLags12kHz> expected_output; + std::fill(expected_output.begin(), expected_output.end(), + static_cast<float>(kFrameSize20ms12kHz)); + ExpectNearAbsolute(expected_output, computed_output, 4e-5f); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h new file mode 100644 index 0000000000..c099373200 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/common.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ + +#include <stddef.h> + +namespace webrtc { +namespace rnn_vad { + +constexpr double kPi = 3.14159265358979323846; + +constexpr int kSampleRate24kHz = 24000; +constexpr int kFrameSize10ms24kHz = kSampleRate24kHz / 100; +constexpr int kFrameSize20ms24kHz = kFrameSize10ms24kHz * 2; + +// Pitch buffer. +constexpr int kMinPitch24kHz = kSampleRate24kHz / 800; // 0.00125 s. +constexpr int kMaxPitch24kHz = kSampleRate24kHz / 62.5; // 0.016 s. +constexpr int kBufSize24kHz = kMaxPitch24kHz + kFrameSize20ms24kHz; +static_assert((kBufSize24kHz & 1) == 0, "The buffer size must be even."); + +// 24 kHz analysis. +// Define a higher minimum pitch period for the initial search. This is used to +// avoid searching for very short periods, for which a refinement step is +// responsible. +constexpr int kInitialMinPitch24kHz = 3 * kMinPitch24kHz; +static_assert(kMinPitch24kHz < kInitialMinPitch24kHz, ""); +static_assert(kInitialMinPitch24kHz < kMaxPitch24kHz, ""); +static_assert(kMaxPitch24kHz > kInitialMinPitch24kHz, ""); +// Number of (inverted) lags during the initial pitch search phase at 24 kHz. +constexpr int kInitialNumLags24kHz = kMaxPitch24kHz - kInitialMinPitch24kHz; +// Number of (inverted) lags during the pitch search refinement phase at 24 kHz. +constexpr int kRefineNumLags24kHz = kMaxPitch24kHz + 1; +static_assert( + kRefineNumLags24kHz > kInitialNumLags24kHz, + "The refinement step must search the pitch in an extended pitch range."); + +// 12 kHz analysis. +constexpr int kSampleRate12kHz = 12000; +constexpr int kFrameSize10ms12kHz = kSampleRate12kHz / 100; +constexpr int kFrameSize20ms12kHz = kFrameSize10ms12kHz * 2; +constexpr int kBufSize12kHz = kBufSize24kHz / 2; +constexpr int kInitialMinPitch12kHz = kInitialMinPitch24kHz / 2; +constexpr int kMaxPitch12kHz = kMaxPitch24kHz / 2; +static_assert(kMaxPitch12kHz > kInitialMinPitch12kHz, ""); +// The inverted lags for the pitch interval [`kInitialMinPitch12kHz`, +// `kMaxPitch12kHz`] are in the range [0, `kNumLags12kHz`]. +constexpr int kNumLags12kHz = kMaxPitch12kHz - kInitialMinPitch12kHz; + +// 48 kHz constants. +constexpr int kMinPitch48kHz = kMinPitch24kHz * 2; +constexpr int kMaxPitch48kHz = kMaxPitch24kHz * 2; + +// Spectral features. +constexpr int kNumBands = 22; +constexpr int kNumLowerBands = 6; +static_assert((0 < kNumLowerBands) && (kNumLowerBands < kNumBands), ""); +constexpr int kCepstralCoeffsHistorySize = 8; +static_assert(kCepstralCoeffsHistorySize > 2, + "The history size must at least be 3 to compute first and second " + "derivatives."); + +constexpr int kFeatureVectorSize = 42; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc new file mode 100644 index 0000000000..502023428d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" + +#include <array> + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Computed as `scipy.signal.butter(N=2, Wn=60/24000, btype='highpass')`. +constexpr BiQuadFilter::Config kHpfConfig24k{ + {0.99446179f, -1.98892358f, 0.99446179f}, + {-1.98889291f, 0.98895425f}}; + +} // namespace + +FeaturesExtractor::FeaturesExtractor(const AvailableCpuFeatures& cpu_features) + : use_high_pass_filter_(false), + hpf_(kHpfConfig24k), + pitch_buf_24kHz_(), + pitch_buf_24kHz_view_(pitch_buf_24kHz_.GetBufferView()), + lp_residual_(kBufSize24kHz), + lp_residual_view_(lp_residual_.data(), kBufSize24kHz), + pitch_estimator_(cpu_features), + reference_frame_view_(pitch_buf_24kHz_.GetMostRecentValuesView()) { + RTC_DCHECK_EQ(kBufSize24kHz, lp_residual_.size()); + Reset(); +} + +FeaturesExtractor::~FeaturesExtractor() = default; + +void FeaturesExtractor::Reset() { + pitch_buf_24kHz_.Reset(); + spectral_features_extractor_.Reset(); + if (use_high_pass_filter_) { + hpf_.Reset(); + } +} + +bool FeaturesExtractor::CheckSilenceComputeFeatures( + rtc::ArrayView<const float, kFrameSize10ms24kHz> samples, + rtc::ArrayView<float, kFeatureVectorSize> feature_vector) { + // Pre-processing. + if (use_high_pass_filter_) { + std::array<float, kFrameSize10ms24kHz> samples_filtered; + hpf_.Process(samples, samples_filtered); + // Feed buffer with the pre-processed version of `samples`. + pitch_buf_24kHz_.Push(samples_filtered); + } else { + // Feed buffer with `samples`. + pitch_buf_24kHz_.Push(samples); + } + // Extract the LP residual. + float lpc_coeffs[kNumLpcCoefficients]; + ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_, lpc_coeffs); + ComputeLpResidual(lpc_coeffs, pitch_buf_24kHz_view_, lp_residual_view_); + // Estimate pitch on the LP-residual and write the normalized pitch period + // into the output vector (normalization based on training data stats). + pitch_period_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_); + feature_vector[kFeatureVectorSize - 2] = 0.01f * (pitch_period_48kHz_ - 300); + // Extract lagged frames (according to the estimated pitch period). + RTC_DCHECK_LE(pitch_period_48kHz_ / 2, kMaxPitch24kHz); + auto lagged_frame = pitch_buf_24kHz_view_.subview( + kMaxPitch24kHz - pitch_period_48kHz_ / 2, kFrameSize20ms24kHz); + // Analyze reference and lagged frames checking if silence has been detected + // and write the feature vector. + return spectral_features_extractor_.CheckSilenceComputeFeatures( + reference_frame_view_, {lagged_frame.data(), kFrameSize20ms24kHz}, + {feature_vector.data() + kNumLowerBands, kNumBands - kNumLowerBands}, + {feature_vector.data(), kNumLowerBands}, + {feature_vector.data() + kNumBands, kNumLowerBands}, + {feature_vector.data() + kNumBands + kNumLowerBands, kNumLowerBands}, + {feature_vector.data() + kNumBands + 2 * kNumLowerBands, kNumLowerBands}, + &feature_vector[kFeatureVectorSize - 1]); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h new file mode 100644 index 0000000000..d47a85bfb0 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ + +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/biquad_filter.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" +#include "modules/audio_processing/agc2/rnn_vad/sequence_buffer.h" +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +namespace webrtc { +namespace rnn_vad { + +// Feature extractor to feed the VAD RNN. +class FeaturesExtractor { + public: + explicit FeaturesExtractor(const AvailableCpuFeatures& cpu_features); + FeaturesExtractor(const FeaturesExtractor&) = delete; + FeaturesExtractor& operator=(const FeaturesExtractor&) = delete; + ~FeaturesExtractor(); + void Reset(); + // Analyzes the samples, computes the feature vector and returns true if + // silence is detected (false if not). When silence is detected, + // `feature_vector` is partially written and therefore must not be used to + // feed the VAD RNN. + bool CheckSilenceComputeFeatures( + rtc::ArrayView<const float, kFrameSize10ms24kHz> samples, + rtc::ArrayView<float, kFeatureVectorSize> feature_vector); + + private: + const bool use_high_pass_filter_; + // TODO(bugs.webrtc.org/7494): Remove HPF depending on how AGC2 is used in APM + // and on whether an HPF is already used as pre-processing step in APM. + BiQuadFilter hpf_; + SequenceBuffer<float, kBufSize24kHz, kFrameSize10ms24kHz, kFrameSize20ms24kHz> + pitch_buf_24kHz_; + rtc::ArrayView<const float, kBufSize24kHz> pitch_buf_24kHz_view_; + std::vector<float> lp_residual_; + rtc::ArrayView<float, kBufSize24kHz> lp_residual_view_; + PitchEstimator pitch_estimator_; + rtc::ArrayView<const float, kFrameSize20ms24kHz> reference_frame_view_; + SpectralFeaturesExtractor spectral_features_extractor_; + int pitch_period_48kHz_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FEATURES_EXTRACTION_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc new file mode 100644 index 0000000000..96f956adfe --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" + +#include <cmath> +#include <vector> + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "rtc_base/numerics/safe_compare.h" +#include "rtc_base/numerics/safe_conversions.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int ceil(int n, int m) { + return (n + m - 1) / m; +} + +// Number of 10 ms frames required to fill a pitch buffer having size +// `kBufSize24kHz`. +constexpr int kNumTestDataFrames = ceil(kBufSize24kHz, kFrameSize10ms24kHz); +// Number of samples for the test data. +constexpr int kNumTestDataSize = kNumTestDataFrames * kFrameSize10ms24kHz; + +// Verifies that the pitch in Hz is in the detectable range. +bool PitchIsValid(float pitch_hz) { + const int pitch_period = static_cast<float>(kSampleRate24kHz) / pitch_hz; + return kInitialMinPitch24kHz <= pitch_period && + pitch_period <= kMaxPitch24kHz; +} + +void CreatePureTone(float amplitude, float freq_hz, rtc::ArrayView<float> dst) { + for (int i = 0; rtc::SafeLt(i, dst.size()); ++i) { + dst[i] = amplitude * std::sin(2.f * kPi * freq_hz * i / kSampleRate24kHz); + } +} + +// Feeds `features_extractor` with `samples` splitting it in 10 ms frames. +// For every frame, the output is written into `feature_vector`. Returns true +// if silence is detected in the last frame. +bool FeedTestData(FeaturesExtractor& features_extractor, + rtc::ArrayView<const float> samples, + rtc::ArrayView<float, kFeatureVectorSize> feature_vector) { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + bool is_silence = true; + const int num_frames = samples.size() / kFrameSize10ms24kHz; + for (int i = 0; i < num_frames; ++i) { + is_silence = features_extractor.CheckSilenceComputeFeatures( + {samples.data() + i * kFrameSize10ms24kHz, kFrameSize10ms24kHz}, + feature_vector); + } + return is_silence; +} + +// Extracts the features for two pure tones and verifies that the pitch field +// values reflect the known tone frequencies. +TEST(RnnVadTest, FeatureExtractionLowHighPitch) { + constexpr float amplitude = 1000.f; + constexpr float low_pitch_hz = 150.f; + constexpr float high_pitch_hz = 250.f; + ASSERT_TRUE(PitchIsValid(low_pitch_hz)); + ASSERT_TRUE(PitchIsValid(high_pitch_hz)); + + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + FeaturesExtractor features_extractor(cpu_features); + std::vector<float> samples(kNumTestDataSize); + std::vector<float> feature_vector(kFeatureVectorSize); + ASSERT_EQ(kFeatureVectorSize, rtc::dchecked_cast<int>(feature_vector.size())); + rtc::ArrayView<float, kFeatureVectorSize> feature_vector_view( + feature_vector.data(), kFeatureVectorSize); + + // Extract the normalized scalar feature that is proportional to the estimated + // pitch period. + constexpr int pitch_feature_index = kFeatureVectorSize - 2; + // Low frequency tone - i.e., high period. + CreatePureTone(amplitude, low_pitch_hz, samples); + ASSERT_FALSE(FeedTestData(features_extractor, samples, feature_vector_view)); + float high_pitch_period = feature_vector_view[pitch_feature_index]; + // High frequency tone - i.e., low period. + features_extractor.Reset(); + CreatePureTone(amplitude, high_pitch_hz, samples); + ASSERT_FALSE(FeedTestData(features_extractor, samples, feature_vector_view)); + float low_pitch_period = feature_vector_view[pitch_feature_index]; + // Check. + EXPECT_LT(low_pitch_period, high_pitch_period); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc new file mode 100644 index 0000000000..484bfba459 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" + +#include <algorithm> +#include <array> +#include <cmath> +#include <numeric> + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Computes auto-correlation coefficients for `x` and writes them in +// `auto_corr`. The lag values are in {0, ..., max_lag - 1}, where max_lag +// equals the size of `auto_corr`. +void ComputeAutoCorrelation( + rtc::ArrayView<const float> x, + rtc::ArrayView<float, kNumLpcCoefficients> auto_corr) { + constexpr int max_lag = auto_corr.size(); + RTC_DCHECK_LT(max_lag, x.size()); + for (int lag = 0; lag < max_lag; ++lag) { + auto_corr[lag] = + std::inner_product(x.begin(), x.end() - lag, x.begin() + lag, 0.f); + } +} + +// Applies denoising to the auto-correlation coefficients. +void DenoiseAutoCorrelation( + rtc::ArrayView<float, kNumLpcCoefficients> auto_corr) { + // Assume -40 dB white noise floor. + auto_corr[0] *= 1.0001f; + // Hard-coded values obtained as + // [np.float32((0.008*0.008*i*i)) for i in range(1,5)]. + auto_corr[1] -= auto_corr[1] * 0.000064f; + auto_corr[2] -= auto_corr[2] * 0.000256f; + auto_corr[3] -= auto_corr[3] * 0.000576f; + auto_corr[4] -= auto_corr[4] * 0.001024f; + static_assert(kNumLpcCoefficients == 5, "Update `auto_corr`."); +} + +// Computes the initial inverse filter coefficients given the auto-correlation +// coefficients of an input frame. +void ComputeInitialInverseFilterCoefficients( + rtc::ArrayView<const float, kNumLpcCoefficients> auto_corr, + rtc::ArrayView<float, kNumLpcCoefficients - 1> lpc_coeffs) { + float error = auto_corr[0]; + for (int i = 0; i < kNumLpcCoefficients - 1; ++i) { + float reflection_coeff = 0.f; + for (int j = 0; j < i; ++j) { + reflection_coeff += lpc_coeffs[j] * auto_corr[i - j]; + } + reflection_coeff += auto_corr[i + 1]; + + // Avoid division by numbers close to zero. + constexpr float kMinErrorMagnitude = 1e-6f; + if (std::fabs(error) < kMinErrorMagnitude) { + error = std::copysign(kMinErrorMagnitude, error); + } + + reflection_coeff /= -error; + // Update LPC coefficients and total error. + lpc_coeffs[i] = reflection_coeff; + for (int j = 0; j < ((i + 1) >> 1); ++j) { + const float tmp1 = lpc_coeffs[j]; + const float tmp2 = lpc_coeffs[i - 1 - j]; + lpc_coeffs[j] = tmp1 + reflection_coeff * tmp2; + lpc_coeffs[i - 1 - j] = tmp2 + reflection_coeff * tmp1; + } + error -= reflection_coeff * reflection_coeff * error; + if (error < 0.001f * auto_corr[0]) { + break; + } + } +} + +} // namespace + +void ComputeAndPostProcessLpcCoefficients( + rtc::ArrayView<const float> x, + rtc::ArrayView<float, kNumLpcCoefficients> lpc_coeffs) { + std::array<float, kNumLpcCoefficients> auto_corr; + ComputeAutoCorrelation(x, auto_corr); + if (auto_corr[0] == 0.f) { // Empty frame. + std::fill(lpc_coeffs.begin(), lpc_coeffs.end(), 0); + return; + } + DenoiseAutoCorrelation(auto_corr); + std::array<float, kNumLpcCoefficients - 1> lpc_coeffs_pre{}; + ComputeInitialInverseFilterCoefficients(auto_corr, lpc_coeffs_pre); + // LPC coefficients post-processing. + // TODO(bugs.webrtc.org/9076): Consider removing these steps. + lpc_coeffs_pre[0] *= 0.9f; + lpc_coeffs_pre[1] *= 0.9f * 0.9f; + lpc_coeffs_pre[2] *= 0.9f * 0.9f * 0.9f; + lpc_coeffs_pre[3] *= 0.9f * 0.9f * 0.9f * 0.9f; + constexpr float kC = 0.8f; + lpc_coeffs[0] = lpc_coeffs_pre[0] + kC; + lpc_coeffs[1] = lpc_coeffs_pre[1] + kC * lpc_coeffs_pre[0]; + lpc_coeffs[2] = lpc_coeffs_pre[2] + kC * lpc_coeffs_pre[1]; + lpc_coeffs[3] = lpc_coeffs_pre[3] + kC * lpc_coeffs_pre[2]; + lpc_coeffs[4] = kC * lpc_coeffs_pre[3]; + static_assert(kNumLpcCoefficients == 5, "Update `lpc_coeffs(_pre)`."); +} + +void ComputeLpResidual( + rtc::ArrayView<const float, kNumLpcCoefficients> lpc_coeffs, + rtc::ArrayView<const float> x, + rtc::ArrayView<float> y) { + RTC_DCHECK_GT(x.size(), kNumLpcCoefficients); + RTC_DCHECK_EQ(x.size(), y.size()); + // The code below implements the following operation: + // y[i] = x[i] + dot_product({x[i], ..., x[i - kNumLpcCoefficients + 1]}, + // lpc_coeffs) + // Edge case: i < kNumLpcCoefficients. + y[0] = x[0]; + for (int i = 1; i < kNumLpcCoefficients; ++i) { + y[i] = + std::inner_product(x.crend() - i, x.crend(), lpc_coeffs.cbegin(), x[i]); + } + // Regular case. + auto last = x.crend(); + for (int i = kNumLpcCoefficients; rtc::SafeLt(i, y.size()); ++i, --last) { + y[i] = std::inner_product(last - kNumLpcCoefficients, last, + lpc_coeffs.cbegin(), x[i]); + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h new file mode 100644 index 0000000000..d04c536ec1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ + +#include <stddef.h> + +#include "api/array_view.h" + +namespace webrtc { +namespace rnn_vad { + +// Linear predictive coding (LPC) inverse filter length. +constexpr int kNumLpcCoefficients = 5; + +// Given a frame `x`, computes a post-processed version of LPC coefficients +// tailored for pitch estimation. +void ComputeAndPostProcessLpcCoefficients( + rtc::ArrayView<const float> x, + rtc::ArrayView<float, kNumLpcCoefficients> lpc_coeffs); + +// Computes the LP residual for the input frame `x` and the LPC coefficients +// `lpc_coeffs`. `y` and `x` can point to the same array for in-place +// computation. +void ComputeLpResidual( + rtc::ArrayView<const float, kNumLpcCoefficients> lpc_coeffs, + rtc::ArrayView<const float> x, + rtc::ArrayView<float> y); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_LP_RESIDUAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc new file mode 100644 index 0000000000..7b3a4a3f65 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual_unittest.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h" + +#include <algorithm> +#include <array> +#include <vector> + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Checks that the LP residual can be computed on an empty frame. +TEST(RnnVadTest, LpResidualOfEmptyFrame) { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + // Input frame (empty, i.e., all samples set to 0). + std::array<float, kFrameSize10ms24kHz> empty_frame; + empty_frame.fill(0.f); + // Compute inverse filter coefficients. + std::array<float, kNumLpcCoefficients> lpc; + ComputeAndPostProcessLpcCoefficients(empty_frame, lpc); + // Compute LP residual. + std::array<float, kFrameSize10ms24kHz> lp_residual; + ComputeLpResidual(lpc, empty_frame, lp_residual); +} + +// Checks that the computed LP residual is bit-exact given test input data. +TEST(RnnVadTest, LpResidualPipelineBitExactness) { + // Input and expected output readers. + ChunksFileReader pitch_buffer_reader = CreatePitchBuffer24kHzReader(); + ChunksFileReader lp_pitch_reader = CreateLpResidualAndPitchInfoReader(); + + // Buffers. + std::vector<float> pitch_buffer_24kHz(kBufSize24kHz); + std::array<float, kNumLpcCoefficients> lpc; + std::vector<float> computed_lp_residual(kBufSize24kHz); + std::vector<float> expected_lp_residual(kBufSize24kHz); + + // Test length. + const int num_frames = + std::min(pitch_buffer_reader.num_chunks, 300); // Max 3 s. + ASSERT_GE(lp_pitch_reader.num_chunks, num_frames); + + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + for (int i = 0; i < num_frames; ++i) { + SCOPED_TRACE(i); + // Read input. + ASSERT_TRUE(pitch_buffer_reader.reader->ReadChunk(pitch_buffer_24kHz)); + // Read expected output (ignore pitch gain and period). + ASSERT_TRUE(lp_pitch_reader.reader->ReadChunk(expected_lp_residual)); + lp_pitch_reader.reader->SeekForward(2); // Pitch period and strength. + // Check every 200 ms. + if (i % 20 == 0) { + ComputeAndPostProcessLpcCoefficients(pitch_buffer_24kHz, lpc); + ComputeLpResidual(lpc, pitch_buffer_24kHz, computed_lp_residual); + ExpectNearAbsolute(expected_lp_residual, computed_lp_residual, kFloatMin); + } + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc new file mode 100644 index 0000000000..419620fc0c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" + +#include <array> +#include <cstddef> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { + +PitchEstimator::PitchEstimator(const AvailableCpuFeatures& cpu_features) + : cpu_features_(cpu_features), + y_energy_24kHz_(kRefineNumLags24kHz, 0.f), + pitch_buffer_12kHz_(kBufSize12kHz), + auto_correlation_12kHz_(kNumLags12kHz) {} + +PitchEstimator::~PitchEstimator() = default; + +int PitchEstimator::Estimate( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer) { + rtc::ArrayView<float, kBufSize12kHz> pitch_buffer_12kHz_view( + pitch_buffer_12kHz_.data(), kBufSize12kHz); + RTC_DCHECK_EQ(pitch_buffer_12kHz_.size(), pitch_buffer_12kHz_view.size()); + rtc::ArrayView<float, kNumLags12kHz> auto_correlation_12kHz_view( + auto_correlation_12kHz_.data(), kNumLags12kHz); + RTC_DCHECK_EQ(auto_correlation_12kHz_.size(), + auto_correlation_12kHz_view.size()); + + // TODO(bugs.chromium.org/10480): Use `cpu_features_` to estimate pitch. + // Perform the initial pitch search at 12 kHz. + Decimate2x(pitch_buffer, pitch_buffer_12kHz_view); + auto_corr_calculator_.ComputeOnPitchBuffer(pitch_buffer_12kHz_view, + auto_correlation_12kHz_view); + CandidatePitchPeriods pitch_periods = ComputePitchPeriod12kHz( + pitch_buffer_12kHz_view, auto_correlation_12kHz_view, cpu_features_); + // The refinement is done using the pitch buffer that contains 24 kHz samples. + // Therefore, adapt the inverted lags in `pitch_candidates_inv_lags` from 12 + // to 24 kHz. + pitch_periods.best *= 2; + pitch_periods.second_best *= 2; + + // Refine the initial pitch period estimation from 12 kHz to 48 kHz. + // Pre-compute frame energies at 24 kHz. + rtc::ArrayView<float, kRefineNumLags24kHz> y_energy_24kHz_view( + y_energy_24kHz_.data(), kRefineNumLags24kHz); + RTC_DCHECK_EQ(y_energy_24kHz_.size(), y_energy_24kHz_view.size()); + ComputeSlidingFrameSquareEnergies24kHz(pitch_buffer, y_energy_24kHz_view, + cpu_features_); + // Estimation at 48 kHz. + const int pitch_lag_48kHz = ComputePitchPeriod48kHz( + pitch_buffer, y_energy_24kHz_view, pitch_periods, cpu_features_); + last_pitch_48kHz_ = ComputeExtendedPitchPeriod48kHz( + pitch_buffer, y_energy_24kHz_view, + /*initial_pitch_period_48kHz=*/kMaxPitch48kHz - pitch_lag_48kHz, + last_pitch_48kHz_, cpu_features_); + return last_pitch_48kHz_.period; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h new file mode 100644 index 0000000000..42c448eb56 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ + +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/auto_correlation.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "rtc_base/gtest_prod_util.h" + +namespace webrtc { +namespace rnn_vad { + +// Pitch estimator. +class PitchEstimator { + public: + explicit PitchEstimator(const AvailableCpuFeatures& cpu_features); + PitchEstimator(const PitchEstimator&) = delete; + PitchEstimator& operator=(const PitchEstimator&) = delete; + ~PitchEstimator(); + // Returns the estimated pitch period at 48 kHz. + int Estimate(rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer); + + private: + FRIEND_TEST_ALL_PREFIXES(RnnVadTest, PitchSearchWithinTolerance); + float GetLastPitchStrengthForTesting() const { + return last_pitch_48kHz_.strength; + } + + const AvailableCpuFeatures cpu_features_; + PitchInfo last_pitch_48kHz_{}; + AutoCorrelationCalculator auto_corr_calculator_; + std::vector<float> y_energy_24kHz_; + std::vector<float> pitch_buffer_12kHz_; + std::vector<float> auto_correlation_12kHz_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc new file mode 100644 index 0000000000..e8c912518d --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc @@ -0,0 +1,513 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" + +#include <stdlib.h> + +#include <algorithm> +#include <cmath> +#include <cstddef> +#include <numeric> + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +float ComputeAutoCorrelation( + int inverted_lag, + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + const VectorMath& vector_math) { + RTC_DCHECK_LT(inverted_lag, kBufSize24kHz); + RTC_DCHECK_LT(inverted_lag, kRefineNumLags24kHz); + static_assert(kMaxPitch24kHz < kBufSize24kHz, ""); + return vector_math.DotProduct( + pitch_buffer.subview(/*offset=*/kMaxPitch24kHz), + pitch_buffer.subview(inverted_lag, kFrameSize20ms24kHz)); +} + +// Given an auto-correlation coefficient `curr_auto_correlation` and its +// neighboring values `prev_auto_correlation` and `next_auto_correlation` +// computes a pseudo-interpolation offset to be applied to the pitch period +// associated to `curr`. The output is a lag in {-1, 0, +1}. +// TODO(bugs.webrtc.org/9076): Consider removing this method. +// `GetPitchPseudoInterpolationOffset()` it is relevant only if the spectral +// analysis works at a sample rate that is twice as that of the pitch buffer; +// In particular, it is not relevant for the estimated pitch period feature fed +// into the RNN. +int GetPitchPseudoInterpolationOffset(float prev_auto_correlation, + float curr_auto_correlation, + float next_auto_correlation) { + if ((next_auto_correlation - prev_auto_correlation) > + 0.7f * (curr_auto_correlation - prev_auto_correlation)) { + return 1; // `next_auto_correlation` is the largest auto-correlation + // coefficient. + } else if ((prev_auto_correlation - next_auto_correlation) > + 0.7f * (curr_auto_correlation - next_auto_correlation)) { + return -1; // `prev_auto_correlation` is the largest auto-correlation + // coefficient. + } + return 0; +} + +// Refines a pitch period `lag` encoded as lag with pseudo-interpolation. The +// output sample rate is twice as that of `lag`. +int PitchPseudoInterpolationLagPitchBuf( + int lag, + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + const VectorMath& vector_math) { + int offset = 0; + // Cannot apply pseudo-interpolation at the boundaries. + if (lag > 0 && lag < kMaxPitch24kHz) { + const int inverted_lag = kMaxPitch24kHz - lag; + offset = GetPitchPseudoInterpolationOffset( + ComputeAutoCorrelation(inverted_lag + 1, pitch_buffer, vector_math), + ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math), + ComputeAutoCorrelation(inverted_lag - 1, pitch_buffer, vector_math)); + } + return 2 * lag + offset; +} + +// Integer multipliers used in ComputeExtendedPitchPeriod48kHz() when +// looking for sub-harmonics. +// The values have been chosen to serve the following algorithm. Given the +// initial pitch period T, we examine whether one of its harmonics is the true +// fundamental frequency. We consider T/k with k in {2, ..., 15}. For each of +// these harmonics, in addition to the pitch strength of itself, we choose one +// multiple of its pitch period, n*T/k, to validate it (by averaging their pitch +// strengths). The multiplier n is chosen so that n*T/k is used only one time +// over all k. When for example k = 4, we should also expect a peak at 3*T/4. +// When k = 8 instead we don't want to look at 2*T/8, since we have already +// checked T/4 before. Instead, we look at T*3/8. +// The array can be generate in Python as follows: +// from fractions import Fraction +// # Smallest positive integer not in X. +// def mex(X): +// for i in range(1, int(max(X)+2)): +// if i not in X: +// return i +// # Visited multiples of the period. +// S = {1} +// for n in range(2, 16): +// sn = mex({n * i for i in S} | {1}) +// S = S | {Fraction(1, n), Fraction(sn, n)} +// print(sn, end=', ') +constexpr std::array<int, 14> kSubHarmonicMultipliers = { + {3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}}; + +struct Range { + int min; + int max; +}; + +// Number of analyzed pitches to the left(right) of a pitch candidate. +constexpr int kPitchNeighborhoodRadius = 2; + +// Creates a pitch period interval centered in `inverted_lag` with hard-coded +// radius. Clipping is applied so that the interval is always valid for a 24 kHz +// pitch buffer. +Range CreateInvertedLagRange(int inverted_lag) { + return {std::max(inverted_lag - kPitchNeighborhoodRadius, 0), + std::min(inverted_lag + kPitchNeighborhoodRadius, + kInitialNumLags24kHz - 1)}; +} + +constexpr int kNumPitchCandidates = 2; // Best and second best. +// Maximum number of analyzed pitch periods. +constexpr int kMaxPitchPeriods24kHz = + kNumPitchCandidates * (2 * kPitchNeighborhoodRadius + 1); + +// Collection of inverted lags. +class InvertedLagsIndex { + public: + InvertedLagsIndex() : num_entries_(0) {} + // Adds an inverted lag to the index. Cannot add more than + // `kMaxPitchPeriods24kHz` values. + void Append(int inverted_lag) { + RTC_DCHECK_LT(num_entries_, kMaxPitchPeriods24kHz); + inverted_lags_[num_entries_++] = inverted_lag; + } + const int* data() const { return inverted_lags_.data(); } + int size() const { return num_entries_; } + + private: + std::array<int, kMaxPitchPeriods24kHz> inverted_lags_; + int num_entries_; +}; + +// Computes the auto correlation coefficients for the inverted lags in the +// closed interval `inverted_lags`. Updates `inverted_lags_index` by appending +// the inverted lags for the computed auto correlation values. +void ComputeAutoCorrelation( + Range inverted_lags, + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<float, kInitialNumLags24kHz> auto_correlation, + InvertedLagsIndex& inverted_lags_index, + const VectorMath& vector_math) { + // Check valid range. + RTC_DCHECK_LE(inverted_lags.min, inverted_lags.max); + // Trick to avoid zero initialization of `auto_correlation`. + // Needed by the pseudo-interpolation. + if (inverted_lags.min > 0) { + auto_correlation[inverted_lags.min - 1] = 0.f; + } + if (inverted_lags.max < kInitialNumLags24kHz - 1) { + auto_correlation[inverted_lags.max + 1] = 0.f; + } + // Check valid `inverted_lag` indexes. + RTC_DCHECK_GE(inverted_lags.min, 0); + RTC_DCHECK_LT(inverted_lags.max, kInitialNumLags24kHz); + for (int inverted_lag = inverted_lags.min; inverted_lag <= inverted_lags.max; + ++inverted_lag) { + auto_correlation[inverted_lag] = + ComputeAutoCorrelation(inverted_lag, pitch_buffer, vector_math); + inverted_lags_index.Append(inverted_lag); + } +} + +// Searches the strongest pitch period at 24 kHz and returns its inverted lag at +// 48 kHz. +int ComputePitchPeriod48kHz( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<const int> inverted_lags, + rtc::ArrayView<const float, kInitialNumLags24kHz> auto_correlation, + rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, + const VectorMath& vector_math) { + static_assert(kMaxPitch24kHz > kInitialNumLags24kHz, ""); + static_assert(kMaxPitch24kHz < kBufSize24kHz, ""); + int best_inverted_lag = 0; // Pitch period. + float best_numerator = -1.f; // Pitch strength numerator. + float best_denominator = 0.f; // Pitch strength denominator. + for (int inverted_lag : inverted_lags) { + // A pitch candidate must have positive correlation. + if (auto_correlation[inverted_lag] > 0.f) { + // Auto-correlation energy normalized by frame energy. + const float numerator = + auto_correlation[inverted_lag] * auto_correlation[inverted_lag]; + const float denominator = y_energy[inverted_lag]; + // Compare numerator/denominator ratios without using divisions. + if (numerator * best_denominator > best_numerator * denominator) { + best_inverted_lag = inverted_lag; + best_numerator = numerator; + best_denominator = denominator; + } + } + } + // Pseudo-interpolation to transform `best_inverted_lag` (24 kHz pitch) to a + // 48 kHz pitch period. + if (best_inverted_lag == 0 || best_inverted_lag >= kInitialNumLags24kHz - 1) { + // Cannot apply pseudo-interpolation at the boundaries. + return best_inverted_lag * 2; + } + int offset = GetPitchPseudoInterpolationOffset( + auto_correlation[best_inverted_lag + 1], + auto_correlation[best_inverted_lag], + auto_correlation[best_inverted_lag - 1]); + // TODO(bugs.webrtc.org/9076): When retraining, check if `offset` below should + // be subtracted since `inverted_lag` is an inverted lag but offset is a lag. + return 2 * best_inverted_lag + offset; +} + +// Returns an alternative pitch period for `pitch_period` given a `multiplier` +// and a `divisor` of the period. +constexpr int GetAlternativePitchPeriod(int pitch_period, + int multiplier, + int divisor) { + RTC_DCHECK_GT(divisor, 0); + // Same as `round(multiplier * pitch_period / divisor)`. + return (2 * multiplier * pitch_period + divisor) / (2 * divisor); +} + +// Returns true if the alternative pitch period is stronger than the initial one +// given the last estimated pitch and the value of `period_divisor` used to +// compute the alternative pitch period via `GetAlternativePitchPeriod()`. +bool IsAlternativePitchStrongerThanInitial(PitchInfo last, + PitchInfo initial, + PitchInfo alternative, + int period_divisor) { + // Initial pitch period candidate thresholds for a sample rate of 24 kHz. + // Computed as [5*k*k for k in range(16)]. + constexpr std::array<int, 14> kInitialPitchPeriodThresholds = { + {20, 45, 80, 125, 180, 245, 320, 405, 500, 605, 720, 845, 980, 1125}}; + static_assert( + kInitialPitchPeriodThresholds.size() == kSubHarmonicMultipliers.size(), + ""); + RTC_DCHECK_GE(last.period, 0); + RTC_DCHECK_GE(initial.period, 0); + RTC_DCHECK_GE(alternative.period, 0); + RTC_DCHECK_GE(period_divisor, 2); + // Compute a term that lowers the threshold when `alternative.period` is close + // to the last estimated period `last.period` - i.e., pitch tracking. + float lower_threshold_term = 0.f; + if (std::abs(alternative.period - last.period) <= 1) { + // The candidate pitch period is within 1 sample from the last one. + // Make the candidate at `alternative.period` very easy to be accepted. + lower_threshold_term = last.strength; + } else if (std::abs(alternative.period - last.period) == 2 && + initial.period > + kInitialPitchPeriodThresholds[period_divisor - 2]) { + // The candidate pitch period is 2 samples far from the last one and the + // period `initial.period` (from which `alternative.period` has been + // derived) is greater than a threshold. Make `alternative.period` easy to + // be accepted. + lower_threshold_term = 0.5f * last.strength; + } + // Set the threshold based on the strength of the initial estimate + // `initial.period`. Also reduce the chance of false positives caused by a + // bias towards high frequencies (originating from short-term correlations). + float threshold = + std::max(0.3f, 0.7f * initial.strength - lower_threshold_term); + if (alternative.period < 3 * kMinPitch24kHz) { + // High frequency. + threshold = std::max(0.4f, 0.85f * initial.strength - lower_threshold_term); + } else if (alternative.period < 2 * kMinPitch24kHz) { + // Even higher frequency. + threshold = std::max(0.5f, 0.9f * initial.strength - lower_threshold_term); + } + return alternative.strength > threshold; +} + +} // namespace + +void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src, + rtc::ArrayView<float, kBufSize12kHz> dst) { + // TODO(bugs.webrtc.org/9076): Consider adding anti-aliasing filter. + static_assert(2 * kBufSize12kHz == kBufSize24kHz, ""); + for (int i = 0; i < kBufSize12kHz; ++i) { + dst[i] = src[2 * i]; + } +} + +void ComputeSlidingFrameSquareEnergies24kHz( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<float, kRefineNumLags24kHz> y_energy, + AvailableCpuFeatures cpu_features) { + VectorMath vector_math(cpu_features); + static_assert(kFrameSize20ms24kHz < kBufSize24kHz, ""); + const auto frame_20ms_view = pitch_buffer.subview(0, kFrameSize20ms24kHz); + float yy = vector_math.DotProduct(frame_20ms_view, frame_20ms_view); + y_energy[0] = yy; + static_assert(kMaxPitch24kHz - 1 + kFrameSize20ms24kHz < kBufSize24kHz, ""); + static_assert(kMaxPitch24kHz < kRefineNumLags24kHz, ""); + for (int inverted_lag = 0; inverted_lag < kMaxPitch24kHz; ++inverted_lag) { + yy -= pitch_buffer[inverted_lag] * pitch_buffer[inverted_lag]; + yy += pitch_buffer[inverted_lag + kFrameSize20ms24kHz] * + pitch_buffer[inverted_lag + kFrameSize20ms24kHz]; + yy = std::max(1.f, yy); + y_energy[inverted_lag + 1] = yy; + } +} + +CandidatePitchPeriods ComputePitchPeriod12kHz( + rtc::ArrayView<const float, kBufSize12kHz> pitch_buffer, + rtc::ArrayView<const float, kNumLags12kHz> auto_correlation, + AvailableCpuFeatures cpu_features) { + static_assert(kMaxPitch12kHz > kNumLags12kHz, ""); + static_assert(kMaxPitch12kHz < kBufSize12kHz, ""); + + // Stores a pitch candidate period and strength information. + struct PitchCandidate { + // Pitch period encoded as inverted lag. + int period_inverted_lag = 0; + // Pitch strength encoded as a ratio. + float strength_numerator = -1.f; + float strength_denominator = 0.f; + // Compare the strength of two pitch candidates. + bool HasStrongerPitchThan(const PitchCandidate& b) const { + // Comparing the numerator/denominator ratios without using divisions. + return strength_numerator * b.strength_denominator > + b.strength_numerator * strength_denominator; + } + }; + + VectorMath vector_math(cpu_features); + static_assert(kFrameSize20ms12kHz + 1 < kBufSize12kHz, ""); + const auto frame_view = pitch_buffer.subview(0, kFrameSize20ms12kHz + 1); + float denominator = 1.f + vector_math.DotProduct(frame_view, frame_view); + // Search best and second best pitches by looking at the scaled + // auto-correlation. + PitchCandidate best; + PitchCandidate second_best; + second_best.period_inverted_lag = 1; + for (int inverted_lag = 0; inverted_lag < kNumLags12kHz; ++inverted_lag) { + // A pitch candidate must have positive correlation. + if (auto_correlation[inverted_lag] > 0.f) { + PitchCandidate candidate{ + inverted_lag, + auto_correlation[inverted_lag] * auto_correlation[inverted_lag], + denominator}; + if (candidate.HasStrongerPitchThan(second_best)) { + if (candidate.HasStrongerPitchThan(best)) { + second_best = best; + best = candidate; + } else { + second_best = candidate; + } + } + } + // Update `squared_energy_y` for the next inverted lag. + const float y_old = pitch_buffer[inverted_lag]; + const float y_new = pitch_buffer[inverted_lag + kFrameSize20ms12kHz]; + denominator -= y_old * y_old; + denominator += y_new * y_new; + denominator = std::max(0.f, denominator); + } + return {best.period_inverted_lag, second_best.period_inverted_lag}; +} + +int ComputePitchPeriod48kHz( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, + CandidatePitchPeriods pitch_candidates, + AvailableCpuFeatures cpu_features) { + // Compute the auto-correlation terms only for neighbors of the two pitch + // candidates (best and second best). + std::array<float, kInitialNumLags24kHz> auto_correlation; + InvertedLagsIndex inverted_lags_index; + // Create two inverted lag ranges so that `r1` precedes `r2`. + const bool swap_candidates = + pitch_candidates.best > pitch_candidates.second_best; + const Range r1 = CreateInvertedLagRange( + swap_candidates ? pitch_candidates.second_best : pitch_candidates.best); + const Range r2 = CreateInvertedLagRange( + swap_candidates ? pitch_candidates.best : pitch_candidates.second_best); + // Check valid ranges. + RTC_DCHECK_LE(r1.min, r1.max); + RTC_DCHECK_LE(r2.min, r2.max); + // Check `r1` precedes `r2`. + RTC_DCHECK_LE(r1.min, r2.min); + RTC_DCHECK_LE(r1.max, r2.max); + VectorMath vector_math(cpu_features); + if (r1.max + 1 >= r2.min) { + // Overlapping or adjacent ranges. + ComputeAutoCorrelation({r1.min, r2.max}, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + } else { + // Disjoint ranges. + ComputeAutoCorrelation(r1, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + ComputeAutoCorrelation(r2, pitch_buffer, auto_correlation, + inverted_lags_index, vector_math); + } + return ComputePitchPeriod48kHz(pitch_buffer, inverted_lags_index, + auto_correlation, y_energy, vector_math); +} + +PitchInfo ComputeExtendedPitchPeriod48kHz( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, + int initial_pitch_period_48kHz, + PitchInfo last_pitch_48kHz, + AvailableCpuFeatures cpu_features) { + RTC_DCHECK_LE(kMinPitch48kHz, initial_pitch_period_48kHz); + RTC_DCHECK_LE(initial_pitch_period_48kHz, kMaxPitch48kHz); + + // Stores information for a refined pitch candidate. + struct RefinedPitchCandidate { + int period; + float strength; + // Additional strength data used for the final pitch estimation. + float xy; // Auto-correlation. + float y_energy; // Energy of the sliding frame `y`. + }; + + const float x_energy = y_energy[kMaxPitch24kHz]; + const auto pitch_strength = [x_energy](float xy, float y_energy) { + RTC_DCHECK_GE(x_energy * y_energy, 0.f); + return xy / std::sqrt(1.f + x_energy * y_energy); + }; + VectorMath vector_math(cpu_features); + + // Initialize the best pitch candidate with `initial_pitch_period_48kHz`. + RefinedPitchCandidate best_pitch; + best_pitch.period = + std::min(initial_pitch_period_48kHz / 2, kMaxPitch24kHz - 1); + best_pitch.xy = ComputeAutoCorrelation(kMaxPitch24kHz - best_pitch.period, + pitch_buffer, vector_math); + best_pitch.y_energy = y_energy[kMaxPitch24kHz - best_pitch.period]; + best_pitch.strength = pitch_strength(best_pitch.xy, best_pitch.y_energy); + // Keep a copy of the initial pitch candidate. + const PitchInfo initial_pitch{best_pitch.period, best_pitch.strength}; + // 24 kHz version of the last estimated pitch. + const PitchInfo last_pitch{last_pitch_48kHz.period / 2, + last_pitch_48kHz.strength}; + + // Find `max_period_divisor` such that the result of + // `GetAlternativePitchPeriod(initial_pitch_period, 1, max_period_divisor)` + // equals `kMinPitch24kHz`. + const int max_period_divisor = + (2 * initial_pitch.period) / (2 * kMinPitch24kHz - 1); + for (int period_divisor = 2; period_divisor <= max_period_divisor; + ++period_divisor) { + PitchInfo alternative_pitch; + alternative_pitch.period = GetAlternativePitchPeriod( + initial_pitch.period, /*multiplier=*/1, period_divisor); + RTC_DCHECK_GE(alternative_pitch.period, kMinPitch24kHz); + // When looking at `alternative_pitch.period`, we also look at one of its + // sub-harmonics. `kSubHarmonicMultipliers` is used to know where to look. + // `period_divisor` == 2 is a special case since `dual_alternative_period` + // might be greater than the maximum pitch period. + int dual_alternative_period = GetAlternativePitchPeriod( + initial_pitch.period, kSubHarmonicMultipliers[period_divisor - 2], + period_divisor); + RTC_DCHECK_GT(dual_alternative_period, 0); + if (period_divisor == 2 && dual_alternative_period > kMaxPitch24kHz) { + dual_alternative_period = initial_pitch.period; + } + RTC_DCHECK_NE(alternative_pitch.period, dual_alternative_period) + << "The lower pitch period and the additional sub-harmonic must not " + "coincide."; + // Compute an auto-correlation score for the primary pitch candidate + // `alternative_pitch.period` by also looking at its possible sub-harmonic + // `dual_alternative_period`. + const float xy_primary_period = ComputeAutoCorrelation( + kMaxPitch24kHz - alternative_pitch.period, pitch_buffer, vector_math); + // TODO(webrtc:10480): Copy `xy_primary_period` if the secondary period is + // equal to the primary one. + const float xy_secondary_period = ComputeAutoCorrelation( + kMaxPitch24kHz - dual_alternative_period, pitch_buffer, vector_math); + const float xy = 0.5f * (xy_primary_period + xy_secondary_period); + const float yy = + 0.5f * (y_energy[kMaxPitch24kHz - alternative_pitch.period] + + y_energy[kMaxPitch24kHz - dual_alternative_period]); + alternative_pitch.strength = pitch_strength(xy, yy); + + // Maybe update best period. + if (IsAlternativePitchStrongerThanInitial( + last_pitch, initial_pitch, alternative_pitch, period_divisor)) { + best_pitch = {alternative_pitch.period, alternative_pitch.strength, xy, + yy}; + } + } + + // Final pitch strength and period. + best_pitch.xy = std::max(0.f, best_pitch.xy); + RTC_DCHECK_LE(0.f, best_pitch.y_energy); + float final_pitch_strength = + (best_pitch.y_energy <= best_pitch.xy) + ? 1.f + : best_pitch.xy / (best_pitch.y_energy + 1.f); + final_pitch_strength = std::min(best_pitch.strength, final_pitch_strength); + int final_pitch_period_48kHz = std::max( + kMinPitch48kHz, PitchPseudoInterpolationLagPitchBuf( + best_pitch.period, pitch_buffer, vector_math)); + + return {final_pitch_period_48kHz, final_pitch_strength}; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h new file mode 100644 index 0000000000..aa2dd13745 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ + +#include <stddef.h> + +#include <array> +#include <utility> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" + +namespace webrtc { +namespace rnn_vad { + +// Performs 2x decimation without any anti-aliasing filter. +void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src, + rtc::ArrayView<float, kBufSize12kHz> dst); + +// Key concepts and keywords used below in this file. +// +// The pitch estimation relies on a pitch buffer, which is an array-like data +// structured designed as follows: +// +// |....A....|.....B.....| +// +// The part on the left, named `A` contains the oldest samples, whereas `B` +// contains the most recent ones. The size of `A` corresponds to the maximum +// pitch period, that of `B` to the analysis frame size (e.g., 16 ms and 20 ms +// respectively). +// +// Pitch estimation is essentially based on the analysis of two 20 ms frames +// extracted from the pitch buffer. One frame, called `x`, is kept fixed and +// corresponds to `B` - i.e., the most recent 20 ms. The other frame, called +// `y`, is extracted from different parts of the buffer instead. +// +// The offset between `x` and `y` corresponds to a specific pitch period. +// For instance, if `y` is positioned at the beginning of the pitch buffer, then +// the cross-correlation between `x` and `y` can be used as an indication of the +// strength for the maximum pitch. +// +// Such an offset can be encoded in two ways: +// - As a lag, which is the index in the pitch buffer for the first item in `y` +// - As an inverted lag, which is the number of samples from the beginning of +// `x` and the end of `y` +// +// |---->| lag +// |....A....|.....B.....| +// |<--| inverted lag +// |.....y.....| `y` 20 ms frame +// +// The inverted lag has the advantage of being directly proportional to the +// corresponding pitch period. + +// Computes the sum of squared samples for every sliding frame `y` in the pitch +// buffer. The indexes of `y_energy` are inverted lags. +void ComputeSlidingFrameSquareEnergies24kHz( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<float, kRefineNumLags24kHz> y_energy, + AvailableCpuFeatures cpu_features); + +// Top-2 pitch period candidates. Unit: number of samples - i.e., inverted lags. +struct CandidatePitchPeriods { + int best; + int second_best; +}; + +// Computes the candidate pitch periods at 12 kHz given a view on the 12 kHz +// pitch buffer and the auto-correlation values (having inverted lags as +// indexes). +CandidatePitchPeriods ComputePitchPeriod12kHz( + rtc::ArrayView<const float, kBufSize12kHz> pitch_buffer, + rtc::ArrayView<const float, kNumLags12kHz> auto_correlation, + AvailableCpuFeatures cpu_features); + +// Computes the pitch period at 48 kHz given a view on the 24 kHz pitch buffer, +// the energies for the sliding frames `y` at 24 kHz and the pitch period +// candidates at 24 kHz (encoded as inverted lag). +int ComputePitchPeriod48kHz( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, + CandidatePitchPeriods pitch_candidates_24kHz, + AvailableCpuFeatures cpu_features); + +struct PitchInfo { + int period; + float strength; +}; + +// Computes the pitch period at 48 kHz searching in an extended pitch range +// given a view on the 24 kHz pitch buffer, the energies for the sliding frames +// `y` at 24 kHz, the initial 48 kHz estimation (computed by +// `ComputePitchPeriod48kHz()`) and the last estimated pitch. +PitchInfo ComputeExtendedPitchPeriod48kHz( + rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, + rtc::ArrayView<const float, kRefineNumLags24kHz> y_energy, + int initial_pitch_period_48kHz, + PitchInfo last_pitch_48kHz, + AvailableCpuFeatures cpu_features); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_PITCH_SEARCH_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc new file mode 100644 index 0000000000..2a6e68f157 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" + +#include <array> +#include <string> +#include <tuple> + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "rtc_base/strings/string_builder.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kTestPitchPeriodsLow = 3 * kMinPitch48kHz / 2; +constexpr int kTestPitchPeriodsHigh = (3 * kMinPitch48kHz + kMaxPitch48kHz) / 2; + +constexpr float kTestPitchStrengthLow = 0.35f; +constexpr float kTestPitchStrengthHigh = 0.75f; + +template <class T> +std::string PrintTestIndexAndCpuFeatures( + const ::testing::TestParamInfo<T>& info) { + rtc::StringBuilder builder; + builder << info.index << "_" << info.param.cpu_features.ToString(); + return builder.str(); +} + +// Finds the relevant CPU features combinations to test. +std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() { + std::vector<AvailableCpuFeatures> v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + return v; +} + +// Checks that the frame-wise sliding square energy function produces output +// within tolerance given test input data. +TEST(RnnVadTest, ComputeSlidingFrameSquareEnergies24kHzWithinTolerance) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::array<float, kRefineNumLags24kHz> computed_output; + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + computed_output, cpu_features); + auto square_energies_view = test_data.SquareEnergies24kHzView(); + ExpectNearAbsolute({square_energies_view.data(), square_energies_view.size()}, + computed_output, 1e-3f); +} + +// Checks that the estimated pitch period is bit-exact given test input data. +TEST(RnnVadTest, ComputePitchPeriod12kHzBitExactness) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::array<float, kBufSize12kHz> pitch_buf_decimated; + Decimate2x(test_data.PitchBuffer24kHzView(), pitch_buf_decimated); + CandidatePitchPeriods pitch_candidates; + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + pitch_candidates = ComputePitchPeriod12kHz( + pitch_buf_decimated, test_data.AutoCorrelation12kHzView(), cpu_features); + EXPECT_EQ(pitch_candidates.best, 140); + EXPECT_EQ(pitch_candidates.second_best, 142); +} + +// Checks that the refined pitch period is bit-exact given test input data. +TEST(RnnVadTest, ComputePitchPeriod48kHzBitExactness) { + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + + PitchTestData test_data; + std::vector<float> y_energy(kRefineNumLags24kHz); + rtc::ArrayView<float, kRefineNumLags24kHz> y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, cpu_features); + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + /*pitch_candidates=*/{280, 284}, cpu_features), + 560); + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + /*pitch_candidates=*/{260, 284}, cpu_features), + 568); +} + +struct PitchCandidatesParameters { + CandidatePitchPeriods pitch_candidates; + AvailableCpuFeatures cpu_features; +}; + +class PitchCandidatesParametrization + : public ::testing::TestWithParam<PitchCandidatesParameters> {}; + +// Checks that the result of `ComputePitchPeriod48kHz()` does not depend on the +// order of the input pitch candidates. +TEST_P(PitchCandidatesParametrization, + ComputePitchPeriod48kHzOrderDoesNotMatter) { + const PitchCandidatesParameters params = GetParam(); + const CandidatePitchPeriods swapped_pitch_candidates{ + params.pitch_candidates.second_best, params.pitch_candidates.best}; + + PitchTestData test_data; + std::vector<float> y_energy(kRefineNumLags24kHz); + rtc::ArrayView<float, kRefineNumLags24kHz> y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, params.cpu_features); + EXPECT_EQ( + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + params.pitch_candidates, params.cpu_features), + ComputePitchPeriod48kHz(test_data.PitchBuffer24kHzView(), y_energy_view, + swapped_pitch_candidates, params.cpu_features)); +} + +std::vector<PitchCandidatesParameters> CreatePitchCandidatesParameters() { + std::vector<PitchCandidatesParameters> v; + for (AvailableCpuFeatures cpu_features : GetCpuFeaturesToTest()) { + v.push_back({{0, 2}, cpu_features}); + v.push_back({{260, 284}, cpu_features}); + v.push_back({{280, 284}, cpu_features}); + v.push_back( + {{kInitialNumLags24kHz - 2, kInitialNumLags24kHz - 1}, cpu_features}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + PitchCandidatesParametrization, + ::testing::ValuesIn(CreatePitchCandidatesParameters()), + PrintTestIndexAndCpuFeatures<PitchCandidatesParameters>); + +struct ExtendedPitchPeriodSearchParameters { + int initial_pitch_period; + PitchInfo last_pitch; + PitchInfo expected_pitch; + AvailableCpuFeatures cpu_features; +}; + +class ExtendedPitchPeriodSearchParametrizaion + : public ::testing::TestWithParam<ExtendedPitchPeriodSearchParameters> {}; + +// Checks that the computed pitch period is bit-exact and that the computed +// pitch strength is within tolerance given test input data. +TEST_P(ExtendedPitchPeriodSearchParametrizaion, + PeriodBitExactnessGainWithinTolerance) { + const ExtendedPitchPeriodSearchParameters params = GetParam(); + + PitchTestData test_data; + std::vector<float> y_energy(kRefineNumLags24kHz); + rtc::ArrayView<float, kRefineNumLags24kHz> y_energy_view(y_energy.data(), + kRefineNumLags24kHz); + ComputeSlidingFrameSquareEnergies24kHz(test_data.PitchBuffer24kHzView(), + y_energy_view, params.cpu_features); + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + const auto computed_output = ComputeExtendedPitchPeriod48kHz( + test_data.PitchBuffer24kHzView(), y_energy_view, + params.initial_pitch_period, params.last_pitch, params.cpu_features); + EXPECT_EQ(params.expected_pitch.period, computed_output.period); + EXPECT_NEAR(params.expected_pitch.strength, computed_output.strength, 1e-6f); +} + +std::vector<ExtendedPitchPeriodSearchParameters> +CreateExtendedPitchPeriodSearchParameters() { + std::vector<ExtendedPitchPeriodSearchParameters> v; + for (AvailableCpuFeatures cpu_features : GetCpuFeaturesToTest()) { + for (int last_pitch_period : + {kTestPitchPeriodsLow, kTestPitchPeriodsHigh}) { + for (float last_pitch_strength : + {kTestPitchStrengthLow, kTestPitchStrengthHigh}) { + v.push_back({kTestPitchPeriodsLow, + {last_pitch_period, last_pitch_strength}, + {91, -0.0188608f}, + cpu_features}); + v.push_back({kTestPitchPeriodsHigh, + {last_pitch_period, last_pitch_strength}, + {475, -0.0904344f}, + cpu_features}); + } + } + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + ExtendedPitchPeriodSearchParametrizaion, + ::testing::ValuesIn(CreateExtendedPitchPeriodSearchParameters()), + PrintTestIndexAndCpuFeatures<ExtendedPitchPeriodSearchParameters>); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc new file mode 100644 index 0000000000..79b44b995c --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h" + +#include <algorithm> +#include <vector> + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { + +// Checks that the computed pitch period is bit-exact and that the computed +// pitch gain is within tolerance given test input data. +TEST(RnnVadTest, PitchSearchWithinTolerance) { + ChunksFileReader reader = CreateLpResidualAndPitchInfoReader(); + const int num_frames = std::min(reader.num_chunks, 300); // Max 3 s. + std::vector<float> lp_residual(kBufSize24kHz); + float expected_pitch_period, expected_pitch_strength; + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + PitchEstimator pitch_estimator(cpu_features); + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + for (int i = 0; i < num_frames; ++i) { + SCOPED_TRACE(i); + ASSERT_TRUE(reader.reader->ReadChunk(lp_residual)); + ASSERT_TRUE(reader.reader->ReadValue(expected_pitch_period)); + ASSERT_TRUE(reader.reader->ReadValue(expected_pitch_strength)); + int pitch_period = + pitch_estimator.Estimate({lp_residual.data(), kBufSize24kHz}); + EXPECT_EQ(expected_pitch_period, pitch_period); + EXPECT_NEAR(expected_pitch_strength, + pitch_estimator.GetLastPitchStrengthForTesting(), 15e-6f); + } + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h new file mode 100644 index 0000000000..a6f7fdd1a6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ + +#include <array> +#include <cstring> +#include <type_traits> + +#include "api/array_view.h" + +namespace webrtc { +namespace rnn_vad { + +// Ring buffer for N arrays of type T each one with size S. +template <typename T, int S, int N> +class RingBuffer { + static_assert(S > 0, ""); + static_assert(N > 0, ""); + static_assert(std::is_arithmetic<T>::value, + "Integral or floating point required."); + + public: + RingBuffer() : tail_(0) {} + RingBuffer(const RingBuffer&) = delete; + RingBuffer& operator=(const RingBuffer&) = delete; + ~RingBuffer() = default; + // Set the ring buffer values to zero. + void Reset() { buffer_.fill(0); } + // Replace the least recently pushed array in the buffer with `new_values`. + void Push(rtc::ArrayView<const T, S> new_values) { + std::memcpy(buffer_.data() + S * tail_, new_values.data(), S * sizeof(T)); + tail_ += 1; + if (tail_ == N) + tail_ = 0; + } + // Return an array view onto the array with a given delay. A view on the last + // and least recently push array is returned when `delay` is 0 and N - 1 + // respectively. + rtc::ArrayView<const T, S> GetArrayView(int delay) const { + RTC_DCHECK_LE(0, delay); + RTC_DCHECK_LT(delay, N); + int offset = tail_ - 1 - delay; + if (offset < 0) + offset += N; + return {buffer_.data() + S * offset, S}; + } + + private: + int tail_; // Index of the least recently pushed sub-array. + std::array<T, S * N> buffer_{}; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RING_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc new file mode 100644 index 0000000000..d11d4eac3e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/ring_buffer_unittest.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Compare the elements of two given array views. +template <typename T, std::ptrdiff_t S> +void ExpectEq(rtc::ArrayView<const T, S> a, rtc::ArrayView<const T, S> b) { + for (int i = 0; i < S; ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(a[i], b[i]); + } +} + +// Test push/read sequences. +template <typename T, int S, int N> +void TestRingBuffer() { + SCOPED_TRACE(N); + SCOPED_TRACE(S); + std::array<T, S> prev_pushed_array; + std::array<T, S> pushed_array; + rtc::ArrayView<const T, S> pushed_array_view(pushed_array.data(), S); + + // Init. + RingBuffer<T, S, N> ring_buf; + ring_buf.GetArrayView(0); + pushed_array.fill(0); + ring_buf.Push(pushed_array_view); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(0)); + + // Push N times and check most recent and second most recent. + for (T v = 1; v <= static_cast<T>(N); ++v) { + SCOPED_TRACE(v); + prev_pushed_array = pushed_array; + pushed_array.fill(v); + ring_buf.Push(pushed_array_view); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(0)); + if (N > 1) { + pushed_array.fill(v - 1); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(1)); + } + } + + // Check buffer. + for (int delay = 2; delay < N; ++delay) { + SCOPED_TRACE(delay); + T expected_value = N - static_cast<T>(delay); + pushed_array.fill(expected_value); + ExpectEq(pushed_array_view, ring_buf.GetArrayView(delay)); + } +} + +// Check that for different delays, different views are returned. +TEST(RnnVadTest, RingBufferArrayViews) { + constexpr int s = 3; + constexpr int n = 4; + RingBuffer<int, s, n> ring_buf; + std::array<int, s> pushed_array; + pushed_array.fill(1); + for (int k = 0; k <= n; ++k) { // Push data n + 1 times. + SCOPED_TRACE(k); + // Check array views. + for (int i = 0; i < n; ++i) { + SCOPED_TRACE(i); + auto view_i = ring_buf.GetArrayView(i); + for (int j = i + 1; j < n; ++j) { + SCOPED_TRACE(j); + auto view_j = ring_buf.GetArrayView(j); + EXPECT_NE(view_i, view_j); + } + } + ring_buf.Push(pushed_array); + } +} + +TEST(RnnVadTest, RingBufferUnsigned) { + TestRingBuffer<uint8_t, 1, 1>(); + TestRingBuffer<uint8_t, 2, 5>(); + TestRingBuffer<uint8_t, 5, 2>(); + TestRingBuffer<uint8_t, 5, 5>(); +} + +TEST(RnnVadTest, RingBufferSigned) { + TestRingBuffer<int, 1, 1>(); + TestRingBuffer<int, 2, 5>(); + TestRingBuffer<int, 5, 2>(); + TestRingBuffer<int, 5, 5>(); +} + +TEST(RnnVadTest, RingBufferFloating) { + TestRingBuffer<float, 1, 1>(); + TestRingBuffer<float, 2, 5>(); + TestRingBuffer<float, 5, 2>(); + TestRingBuffer<float, 5, 5>(); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc new file mode 100644 index 0000000000..475bef9775 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include "rtc_base/checks.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +using ::rnnoise::kInputLayerInputSize; +static_assert(kFeatureVectorSize == kInputLayerInputSize, ""); +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerOutputSize; +static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +using ::rnnoise::kHiddenGruBias; +using ::rnnoise::kHiddenGruRecurrentWeights; +using ::rnnoise::kHiddenGruWeights; +using ::rnnoise::kHiddenLayerOutputSize; +static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, ""); + +using ::rnnoise::kOutputDenseBias; +using ::rnnoise::kOutputDenseWeights; +using ::rnnoise::kOutputLayerOutputSize; +static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, ""); + +} // namespace + +RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features) + : input_(kInputLayerInputSize, + kInputLayerOutputSize, + kInputDenseBias, + kInputDenseWeights, + ActivationFunction::kTansigApproximated, + cpu_features, + /*layer_name=*/"FC1"), + hidden_(kInputLayerOutputSize, + kHiddenLayerOutputSize, + kHiddenGruBias, + kHiddenGruWeights, + kHiddenGruRecurrentWeights, + cpu_features, + /*layer_name=*/"GRU1"), + output_(kHiddenLayerOutputSize, + kOutputLayerOutputSize, + kOutputDenseBias, + kOutputDenseWeights, + ActivationFunction::kSigmoidApproximated, + // The output layer is just 24x1. The unoptimized code is faster. + NoAvailableCpuFeatures(), + /*layer_name=*/"FC2") { + // Input-output chaining size checks. + RTC_DCHECK_EQ(input_.size(), hidden_.input_size()) + << "The input and the hidden layers sizes do not match."; + RTC_DCHECK_EQ(hidden_.size(), output_.input_size()) + << "The hidden and the output layers sizes do not match."; +} + +RnnVad::~RnnVad() = default; + +void RnnVad::Reset() { + hidden_.Reset(); +} + +float RnnVad::ComputeVadProbability( + rtc::ArrayView<const float, kFeatureVectorSize> feature_vector, + bool is_silence) { + if (is_silence) { + Reset(); + return 0.f; + } + input_.ComputeOutput(feature_vector); + hidden_.ComputeOutput(input_); + output_.ComputeOutput(hidden_); + RTC_DCHECK_EQ(output_.size(), 1); + return output_.data()[0]; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h new file mode 100644 index 0000000000..3148f1b3ff --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ + +#include <stddef.h> +#include <sys/types.h> + +#include <array> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +namespace webrtc { +namespace rnn_vad { + +// Recurrent network with hard-coded architecture and weights for voice activity +// detection. +class RnnVad { + public: + explicit RnnVad(const AvailableCpuFeatures& cpu_features); + RnnVad(const RnnVad&) = delete; + RnnVad& operator=(const RnnVad&) = delete; + ~RnnVad(); + void Reset(); + // Observes `feature_vector` and `is_silence`, updates the RNN and returns the + // current voice probability. + float ComputeVadProbability( + rtc::ArrayView<const float, kFeatureVectorSize> feature_vector, + bool is_silence); + + private: + FullyConnectedLayer input_; + GatedRecurrentLayer hidden_; + FullyConnectedLayer output_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc new file mode 100644 index 0000000000..91501fb6e3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <algorithm> +#include <numeric> + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +std::vector<float> GetScaledParams(rtc::ArrayView<const int8_t> params) { + std::vector<float> scaled_params(params.size()); + std::transform(params.begin(), params.end(), scaled_params.begin(), + [](int8_t x) -> float { + return ::rnnoise::kWeightsScale * static_cast<float>(x); + }); + return scaled_params; +} + +// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this +// function to improve setup time. +// Casts and scales `weights` and re-arranges the layout. +std::vector<float> PreprocessWeights(rtc::ArrayView<const int8_t> weights, + int output_size) { + if (output_size == 1) { + return GetScaledParams(weights); + } + // Transpose, scale and cast. + const int input_size = rtc::CheckedDivExact( + rtc::dchecked_cast<int>(weights.size()), output_size); + std::vector<float> w(weights.size()); + for (int o = 0; o < output_size; ++o) { + for (int i = 0; i < input_size; ++i) { + w[o * input_size + i] = rnnoise::kWeightsScale * + static_cast<float>(weights[i * output_size + o]); + } + } + return w; +} + +rtc::FunctionView<float(float)> GetActivationFunction( + ActivationFunction activation_function) { + switch (activation_function) { + case ActivationFunction::kTansigApproximated: + return ::rnnoise::TansigApproximated; + case ActivationFunction::kSigmoidApproximated: + return ::rnnoise::SigmoidApproximated; + } +} + +} // namespace + +FullyConnectedLayer::FullyConnectedLayer( + const int input_size, + const int output_size, + const rtc::ArrayView<const int8_t> bias, + const rtc::ArrayView<const int8_t> weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name) + : input_size_(input_size), + output_size_(output_size), + bias_(GetScaledParams(bias)), + weights_(PreprocessWeights(weights, output_size)), + vector_math_(cpu_features), + activation_function_(GetActivationFunction(activation_function)) { + RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits) + << "Insufficient FC layer over-allocation (" << layer_name << ")."; + RTC_DCHECK_EQ(output_size_, bias_.size()) + << "Mismatching output size and bias terms array size (" << layer_name + << ")."; + RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size (" + << layer_name << ")."; +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) { + RTC_DCHECK_EQ(input.size(), input_size_); + rtc::ArrayView<const float> weights(weights_); + for (int o = 0; o < output_size_; ++o) { + output_[o] = activation_function_( + bias_[o] + vector_math_.DotProduct( + input, weights.subview(o * input_size_, input_size_))); + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h new file mode 100644 index 0000000000..d23957a6f2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ + +#include <array> +#include <vector> + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/function_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +namespace webrtc { +namespace rnn_vad { + +// Activation function for a neural network cell. +enum class ActivationFunction { kTansigApproximated, kSigmoidApproximated }; + +// Maximum number of units for an FC layer. +constexpr int kFullyConnectedLayerMaxUnits = 24; + +// Fully-connected layer with a custom activation function which owns the output +// buffer. +class FullyConnectedLayer { + public: + // Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`. + FullyConnectedLayer(int input_size, + int output_size, + rtc::ArrayView<const int8_t> bias, + rtc::ArrayView<const int8_t> weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name); + FullyConnectedLayer(const FullyConnectedLayer&) = delete; + FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete; + ~FullyConnectedLayer(); + + // Returns the size of the input vector. + int input_size() const { return input_size_; } + // Returns the pointer to the first element of the output buffer. + const float* data() const { return output_.data(); } + // Returns the size of the output buffer. + int size() const { return output_size_; } + + // Computes the fully-connected layer output. + void ComputeOutput(rtc::ArrayView<const float> input); + + private: + const int input_size_; + const int output_size_; + const std::vector<float> bias_; + const std::vector<float> weights_; + const VectorMath vector_math_; + rtc::FunctionView<float(float)> activation_function_; + // Over-allocated array with size equal to `output_size_`. + std::array<float, kFullyConnectedLayerMaxUnits> output_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc new file mode 100644 index 0000000000..ff9bb18bc2 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" + +#include <array> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/logging.h" +#include "rtc_base/system/arch.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerInputSize; +using ::rnnoise::kInputLayerOutputSize; + +// Fully connected layer test data. +constexpr std::array<float, 42> kFullyConnectedInputVector = { + -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, + -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, + -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, + -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, + 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, + -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, + 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; +constexpr std::array<float, 24> kFullyConnectedExpectedOutput = { + -0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f, + -0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f, + 0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f, + 0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f}; + +class RnnFcParametrization + : public ::testing::TestWithParam<AvailableCpuFeatures> {}; + +// Checks that the output of a fully connected layer is within tolerance given +// test input data. +TEST_P(RnnFcParametrization, CheckFullyConnectedLayerOutput) { + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, + /*cpu_features=*/GetParam(), + /*layer_name=*/"FC"); + fc.ComputeOutput(kFullyConnectedInputVector); + ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f); +} + +TEST_P(RnnFcParametrization, DISABLED_BenchmarkFullyConnectedLayer) { + const AvailableCpuFeatures cpu_features = GetParam(); + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, cpu_features, + /*layer_name=*/"FC"); + + constexpr int kNumTests = 10000; + ::webrtc::test::PerformanceTimer perf_timer(kNumTests); + for (int k = 0; k < kNumTests; ++k) { + perf_timer.StartTimer(); + fc.ComputeOutput(kFullyConnectedInputVector); + perf_timer.StopTimer(); + } + RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | " + << (perf_timer.GetDurationAverage() / 1000) << " +/- " + << (perf_timer.GetDurationStandardDeviation() / 1000) + << " ms"; +} + +// Finds the relevant CPU features combinations to test. +std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() { + std::vector<AvailableCpuFeatures> v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnFcParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc new file mode 100644 index 0000000000..ef37410caa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kNumGruGates = 3; // Update, reset, output. + +std::vector<float> PreprocessGruTensor(rtc::ArrayView<const int8_t> tensor_src, + int output_size) { + // Transpose, cast and scale. + // `n` is the size of the first dimension of the 3-dim tensor `weights`. + const int n = rtc::CheckedDivExact(rtc::dchecked_cast<int>(tensor_src.size()), + output_size * kNumGruGates); + const int stride_src = kNumGruGates * output_size; + const int stride_dst = n * output_size; + std::vector<float> tensor_dst(tensor_src.size()); + for (int g = 0; g < kNumGruGates; ++g) { + for (int o = 0; o < output_size; ++o) { + for (int i = 0; i < n; ++i) { + tensor_dst[g * stride_dst + o * n + i] = + ::rnnoise::kWeightsScale * + static_cast<float>( + tensor_src[i * stride_src + g * output_size + o]); + } + } + } + return tensor_dst; +} + +// Computes the output for the update or the reset gate. +// Operation: `g = sigmoid(W^T∙i + R^T∙s + b)` where +// - `g`: output gate vector +// - `W`: weights matrix +// - `i`: input vector +// - `R`: recurrent weights matrix +// - `s`: state gate vector +// - `b`: bias vector +void ComputeUpdateResetGate(int input_size, + int output_size, + const VectorMath& vector_math, + rtc::ArrayView<const float> input, + rtc::ArrayView<const float> state, + rtc::ArrayView<const float> bias, + rtc::ArrayView<const float> weights, + rtc::ArrayView<const float> recurrent_weights, + rtc::ArrayView<float> gate) { + RTC_DCHECK_EQ(input.size(), input_size); + RTC_DCHECK_EQ(state.size(), output_size); + RTC_DCHECK_EQ(bias.size(), output_size); + RTC_DCHECK_EQ(weights.size(), input_size * output_size); + RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size); + RTC_DCHECK_GE(gate.size(), output_size); // `gate` is over-allocated. + for (int o = 0; o < output_size; ++o) { + float x = bias[o]; + x += vector_math.DotProduct(input, + weights.subview(o * input_size, input_size)); + x += vector_math.DotProduct( + state, recurrent_weights.subview(o * output_size, output_size)); + gate[o] = ::rnnoise::SigmoidApproximated(x); + } +} + +// Computes the output for the state gate. +// Operation: `s' = u .* s + (1 - u) .* ReLU(W^T∙i + R^T∙(s .* r) + b)` where +// - `s'`: output state gate vector +// - `s`: previous state gate vector +// - `u`: update gate vector +// - `W`: weights matrix +// - `i`: input vector +// - `R`: recurrent weights matrix +// - `r`: reset gate vector +// - `b`: bias vector +// - `.*` element-wise product +void ComputeStateGate(int input_size, + int output_size, + const VectorMath& vector_math, + rtc::ArrayView<const float> input, + rtc::ArrayView<const float> update, + rtc::ArrayView<const float> reset, + rtc::ArrayView<const float> bias, + rtc::ArrayView<const float> weights, + rtc::ArrayView<const float> recurrent_weights, + rtc::ArrayView<float> state) { + RTC_DCHECK_EQ(input.size(), input_size); + RTC_DCHECK_GE(update.size(), output_size); // `update` is over-allocated. + RTC_DCHECK_GE(reset.size(), output_size); // `reset` is over-allocated. + RTC_DCHECK_EQ(bias.size(), output_size); + RTC_DCHECK_EQ(weights.size(), input_size * output_size); + RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size); + RTC_DCHECK_EQ(state.size(), output_size); + std::array<float, kGruLayerMaxUnits> reset_x_state; + for (int o = 0; o < output_size; ++o) { + reset_x_state[o] = state[o] * reset[o]; + } + for (int o = 0; o < output_size; ++o) { + float x = bias[o]; + x += vector_math.DotProduct(input, + weights.subview(o * input_size, input_size)); + x += vector_math.DotProduct( + {reset_x_state.data(), static_cast<size_t>(output_size)}, + recurrent_weights.subview(o * output_size, output_size)); + state[o] = update[o] * state[o] + (1.f - update[o]) * std::max(0.f, x); + } +} + +} // namespace + +GatedRecurrentLayer::GatedRecurrentLayer( + const int input_size, + const int output_size, + const rtc::ArrayView<const int8_t> bias, + const rtc::ArrayView<const int8_t> weights, + const rtc::ArrayView<const int8_t> recurrent_weights, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name) + : input_size_(input_size), + output_size_(output_size), + bias_(PreprocessGruTensor(bias, output_size)), + weights_(PreprocessGruTensor(weights, output_size)), + recurrent_weights_(PreprocessGruTensor(recurrent_weights, output_size)), + vector_math_(cpu_features) { + RTC_DCHECK_LE(output_size_, kGruLayerMaxUnits) + << "Insufficient GRU layer over-allocation (" << layer_name << ")."; + RTC_DCHECK_EQ(kNumGruGates * output_size_, bias_.size()) + << "Mismatching output size and bias terms array size (" << layer_name + << ")."; + RTC_DCHECK_EQ(kNumGruGates * input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size (" + << layer_name << ")."; + RTC_DCHECK_EQ(kNumGruGates * output_size_ * output_size_, + recurrent_weights_.size()) + << "Mismatching input-output size and recurrent weight coefficients array" + " size (" + << layer_name << ")."; + Reset(); +} + +GatedRecurrentLayer::~GatedRecurrentLayer() = default; + +void GatedRecurrentLayer::Reset() { + state_.fill(0.f); +} + +void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) { + RTC_DCHECK_EQ(input.size(), input_size_); + + // The tensors below are organized as a sequence of flattened tensors for the + // `update`, `reset` and `state` gates. + rtc::ArrayView<const float> bias(bias_); + rtc::ArrayView<const float> weights(weights_); + rtc::ArrayView<const float> recurrent_weights(recurrent_weights_); + // Strides to access to the flattened tensors for a specific gate. + const int stride_weights = input_size_ * output_size_; + const int stride_recurrent_weights = output_size_ * output_size_; + + rtc::ArrayView<float> state(state_.data(), output_size_); + + // Update gate. + std::array<float, kGruLayerMaxUnits> update; + ComputeUpdateResetGate( + input_size_, output_size_, vector_math_, input, state, + bias.subview(0, output_size_), weights.subview(0, stride_weights), + recurrent_weights.subview(0, stride_recurrent_weights), update); + // Reset gate. + std::array<float, kGruLayerMaxUnits> reset; + ComputeUpdateResetGate(input_size_, output_size_, vector_math_, input, state, + bias.subview(output_size_, output_size_), + weights.subview(stride_weights, stride_weights), + recurrent_weights.subview(stride_recurrent_weights, + stride_recurrent_weights), + reset); + // State gate. + ComputeStateGate(input_size_, output_size_, vector_math_, input, update, + reset, bias.subview(2 * output_size_, output_size_), + weights.subview(2 * stride_weights, stride_weights), + recurrent_weights.subview(2 * stride_recurrent_weights, + stride_recurrent_weights), + state); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h new file mode 100644 index 0000000000..3407dfcdf1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ + +#include <array> +#include <vector> + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +namespace webrtc { +namespace rnn_vad { + +// Maximum number of units for a GRU layer. +constexpr int kGruLayerMaxUnits = 24; + +// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as +// activation functions for the update/reset and output gates respectively. +class GatedRecurrentLayer { + public: + // Ctor. `output_size` cannot be greater than `kGruLayerMaxUnits`. + GatedRecurrentLayer(int input_size, + int output_size, + rtc::ArrayView<const int8_t> bias, + rtc::ArrayView<const int8_t> weights, + rtc::ArrayView<const int8_t> recurrent_weights, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name); + GatedRecurrentLayer(const GatedRecurrentLayer&) = delete; + GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete; + ~GatedRecurrentLayer(); + + // Returns the size of the input vector. + int input_size() const { return input_size_; } + // Returns the pointer to the first element of the output buffer. + const float* data() const { return state_.data(); } + // Returns the size of the output buffer. + int size() const { return output_size_; } + + // Resets the GRU state. + void Reset(); + // Computes the recurrent layer output and updates the status. + void ComputeOutput(rtc::ArrayView<const float> input); + + private: + const int input_size_; + const int output_size_; + const std::vector<float> bias_; + const std::vector<float> weights_; + const std::vector<float> recurrent_weights_; + const VectorMath vector_math_; + // Over-allocated array with size equal to `output_size_`. + std::array<float, kGruLayerMaxUnits> state_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_GRU_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc new file mode 100644 index 0000000000..88ae72803a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru_unittest.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h" + +#include <array> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +void TestGatedRecurrentLayer( + GatedRecurrentLayer& gru, + rtc::ArrayView<const float> input_sequence, + rtc::ArrayView<const float> expected_output_sequence) { + const int input_sequence_length = rtc::CheckedDivExact( + rtc::dchecked_cast<int>(input_sequence.size()), gru.input_size()); + const int output_sequence_length = rtc::CheckedDivExact( + rtc::dchecked_cast<int>(expected_output_sequence.size()), gru.size()); + ASSERT_EQ(input_sequence_length, output_sequence_length) + << "The test data length is invalid."; + // Feed the GRU layer and check the output at every step. + gru.Reset(); + for (int i = 0; i < input_sequence_length; ++i) { + SCOPED_TRACE(i); + gru.ComputeOutput( + input_sequence.subview(i * gru.input_size(), gru.input_size())); + const auto expected_output = + expected_output_sequence.subview(i * gru.size(), gru.size()); + ExpectNearAbsolute(expected_output, gru, 3e-6f); + } +} + +// Gated recurrent units layer test data. +constexpr int kGruInputSize = 5; +constexpr int kGruOutputSize = 4; +constexpr std::array<int8_t, 12> kGruBias = {96, -99, -81, -114, 49, 119, + -118, 68, -76, 91, 121, 125}; +constexpr std::array<int8_t, 60> kGruWeights = { + // Input 0. + 124, 9, 1, 116, // Update. + -66, -21, -118, -110, // Reset. + 104, 75, -23, -51, // Output. + // Input 1. + -72, -111, 47, 93, // Update. + 77, -98, 41, -8, // Reset. + 40, -23, -43, -107, // Output. + // Input 2. + 9, -73, 30, -32, // Update. + -2, 64, -26, 91, // Reset. + -48, -24, -28, -104, // Output. + // Input 3. + 74, -46, 116, 15, // Update. + 32, 52, -126, -38, // Reset. + -121, 12, -16, 110, // Output. + // Input 4. + -95, 66, -103, -35, // Update. + -38, 3, -126, -61, // Reset. + 28, 98, -117, -43 // Output. +}; +constexpr std::array<int8_t, 48> kGruRecurrentWeights = { + // Output 0. + -3, 87, 50, 51, // Update. + -22, 27, -39, 62, // Reset. + 31, -83, -52, -48, // Output. + // Output 1. + -6, 83, -19, 104, // Update. + 105, 48, 23, 68, // Reset. + 23, 40, 7, -120, // Output. + // Output 2. + 64, -62, 117, 85, // Update. + 51, -43, 54, -105, // Reset. + 120, 56, -128, -107, // Output. + // Output 3. + 39, 50, -17, -47, // Update. + -117, 14, 108, 12, // Reset. + -7, -72, 103, -87, // Output. +}; +constexpr std::array<float, 20> kGruInputSequence = { + 0.89395463f, 0.93224651f, 0.55788344f, 0.32341808f, 0.93355054f, + 0.13475326f, 0.97370994f, 0.14253306f, 0.93710381f, 0.76093364f, + 0.65780413f, 0.41657975f, 0.49403164f, 0.46843281f, 0.75138855f, + 0.24517593f, 0.47657707f, 0.57064998f, 0.435184f, 0.19319285f}; +constexpr std::array<float, 16> kGruExpectedOutputSequence = { + 0.0239123f, 0.5773077f, 0.f, 0.f, + 0.01282811f, 0.64330572f, 0.f, 0.04863098f, + 0.00781069f, 0.75267816f, 0.f, 0.02579715f, + 0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f}; + +class RnnGruParametrization + : public ::testing::TestWithParam<AvailableCpuFeatures> {}; + +// Checks that the output of a GRU layer is within tolerance given test input +// data. +TEST_P(RnnGruParametrization, CheckGatedRecurrentLayer) { + GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights, + kGruRecurrentWeights, + /*cpu_features=*/GetParam(), + /*layer_name=*/"GRU"); + TestGatedRecurrentLayer(gru, kGruInputSequence, kGruExpectedOutputSequence); +} + +TEST_P(RnnGruParametrization, DISABLED_BenchmarkGatedRecurrentLayer) { + // Prefetch test data. + std::unique_ptr<FileReader> reader = CreateGruInputReader(); + std::vector<float> gru_input_sequence(reader->size()); + reader->ReadChunk(gru_input_sequence); + + using ::rnnoise::kHiddenGruBias; + using ::rnnoise::kHiddenGruRecurrentWeights; + using ::rnnoise::kHiddenGruWeights; + using ::rnnoise::kHiddenLayerOutputSize; + using ::rnnoise::kInputLayerOutputSize; + + GatedRecurrentLayer gru(kInputLayerOutputSize, kHiddenLayerOutputSize, + kHiddenGruBias, kHiddenGruWeights, + kHiddenGruRecurrentWeights, + /*cpu_features=*/GetParam(), + /*layer_name=*/"GRU"); + + rtc::ArrayView<const float> input_sequence(gru_input_sequence); + ASSERT_EQ(input_sequence.size() % kInputLayerOutputSize, + static_cast<size_t>(0)); + const int input_sequence_length = + input_sequence.size() / kInputLayerOutputSize; + + constexpr int kNumTests = 100; + ::webrtc::test::PerformanceTimer perf_timer(kNumTests); + for (int k = 0; k < kNumTests; ++k) { + perf_timer.StartTimer(); + for (int i = 0; i < input_sequence_length; ++i) { + gru.ComputeOutput( + input_sequence.subview(i * gru.input_size(), gru.input_size())); + } + perf_timer.StopTimer(); + } + RTC_LOG(LS_INFO) << (perf_timer.GetDurationAverage() / 1000) << " +/- " + << (perf_timer.GetDurationStandardDeviation() / 1000) + << " ms"; +} + +// Finds the relevant CPU features combinations to test. +std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() { + std::vector<AvailableCpuFeatures> v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnGruParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc new file mode 100644 index 0000000000..4c5409a14e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr std::array<float, kFeatureVectorSize> kFeatures = { + -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, + -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, + -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, + -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, + 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, + -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, + 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; + +void WarmUpRnnVad(RnnVad& rnn_vad) { + for (int i = 0; i < 10; ++i) { + rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + } +} + +// Checks that the speech probability is zero with silence. +TEST(RnnVadTest, CheckZeroProbabilityWithSilence) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + EXPECT_EQ(rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true), 0.f); +} + +// Checks that the same output is produced after reset given the same input +// sequence. +TEST(RnnVadTest, CheckRnnVadReset) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + rnn_vad.Reset(); + WarmUpRnnVad(rnn_vad); + float post = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + EXPECT_EQ(pre, post); +} + +// Checks that the same output is produced after silence is observed given the +// same input sequence. +TEST(RnnVadTest, CheckRnnVadSilence) { + RnnVad rnn_vad(GetAvailableCpuFeatures()); + WarmUpRnnVad(rnn_vad); + float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true); + WarmUpRnnVad(rnn_vad); + float post = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); + EXPECT_EQ(pre, post); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build new file mode 100644 index 0000000000..1e156ad957 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_auto_correlation_gn/moz.build @@ -0,0 +1,212 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/auto_correlation.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_auto_correlation_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build new file mode 100644 index 0000000000..3bb95f7979 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_common_gn/moz.build @@ -0,0 +1,204 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_common_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build new file mode 100644 index 0000000000..26fa033b16 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_gn/moz.build @@ -0,0 +1,213 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/features_extraction.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build new file mode 100644 index 0000000000..9bac4ab5e9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_layers_gn/moz.build @@ -0,0 +1,213 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_gru.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_layers_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build new file mode 100644 index 0000000000..19e87379fa --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_lp_residual_gn/moz.build @@ -0,0 +1,201 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/lp_residual.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_lp_residual_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build new file mode 100644 index 0000000000..4d64842117 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_pitch_gn/moz.build @@ -0,0 +1,213 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_pitch_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build new file mode 100644 index 0000000000..cc26a37594 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_ring_buffer_gn/moz.build @@ -0,0 +1,193 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_ring_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build new file mode 100644 index 0000000000..874bdcaab7 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_sequence_buffer_gn/moz.build @@ -0,0 +1,193 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_sequence_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build new file mode 100644 index 0000000000..1cdba1b497 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_spectral_features_gn/moz.build @@ -0,0 +1,213 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc", + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_spectral_features_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build new file mode 100644 index 0000000000..e92fb28f27 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_symmetric_matrix_buffer_gn/moz.build @@ -0,0 +1,193 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("rnn_vad_symmetric_matrix_buffer_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc new file mode 100644 index 0000000000..a0e1242eb4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <array> +#include <string> +#include <vector> + +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_compare.h" + +ABSL_FLAG(std::string, i, "", "Path to the input wav file"); +ABSL_FLAG(std::string, f, "", "Path to the output features file"); +ABSL_FLAG(std::string, o, "", "Path to the output VAD probabilities file"); + +namespace webrtc { +namespace rnn_vad { +namespace test { + +int main(int argc, char* argv[]) { + absl::ParseCommandLine(argc, argv); + rtc::LogMessage::LogToDebug(rtc::LS_INFO); + + // Open wav input file and check properties. + const std::string input_wav_file = absl::GetFlag(FLAGS_i); + WavReader wav_reader(input_wav_file); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files are supported"; + return 1; + } + if (wav_reader.sample_rate() % 100 != 0) { + RTC_LOG(LS_ERROR) << "The sample rate rate must allow 10 ms frames."; + return 1; + } + RTC_LOG(LS_INFO) << "Input sample rate: " << wav_reader.sample_rate(); + + // Init output files. + const std::string output_vad_probs_file = absl::GetFlag(FLAGS_o); + FILE* vad_probs_file = fopen(output_vad_probs_file.c_str(), "wb"); + FILE* features_file = nullptr; + const std::string output_feature_file = absl::GetFlag(FLAGS_f); + if (!output_feature_file.empty()) { + features_file = fopen(output_feature_file.c_str(), "wb"); + } + + // Initialize. + const int frame_size_10ms = + rtc::CheckedDivExact(wav_reader.sample_rate(), 100); + std::vector<float> samples_10ms; + samples_10ms.resize(frame_size_10ms); + std::array<float, kFrameSize10ms24kHz> samples_10ms_24kHz; + PushSincResampler resampler(frame_size_10ms, kFrameSize10ms24kHz); + const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); + FeaturesExtractor features_extractor(cpu_features); + std::array<float, kFeatureVectorSize> feature_vector; + RnnVad rnn_vad(cpu_features); + + // Compute VAD probabilities. + while (true) { + // Read frame at the input sample rate. + const size_t read_samples = + wav_reader.ReadSamples(frame_size_10ms, samples_10ms.data()); + if (rtc::SafeLt(read_samples, frame_size_10ms)) { + break; // EOF. + } + // Resample input. + resampler.Resample(samples_10ms.data(), samples_10ms.size(), + samples_10ms_24kHz.data(), samples_10ms_24kHz.size()); + + // Extract features and feed the RNN. + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + samples_10ms_24kHz, feature_vector); + float vad_probability = + rnn_vad.ComputeVadProbability(feature_vector, is_silence); + // Write voice probability. + RTC_DCHECK_GE(vad_probability, 0.f); + RTC_DCHECK_GE(1.f, vad_probability); + fwrite(&vad_probability, sizeof(float), 1, vad_probs_file); + // Write features. + if (features_file) { + const float float_is_silence = is_silence ? 1.f : 0.f; + fwrite(&float_is_silence, sizeof(float), 1, features_file); + if (is_silence) { + // Do not write uninitialized values. + feature_vector.fill(0.f); + } + fwrite(feature_vector.data(), sizeof(float), kFeatureVectorSize, + features_file); + } + } + + // Close output file(s). + fclose(vad_probs_file); + RTC_LOG(LS_INFO) << "VAD probabilities written to " << output_vad_probs_file; + if (features_file) { + fclose(features_file); + RTC_LOG(LS_INFO) << "features written to " << output_feature_file; + } + + return 0; +} + +} // namespace test +} // namespace rnn_vad +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::rnn_vad::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc new file mode 100644 index 0000000000..f33cd14a8a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <array> +#include <memory> +#include <string> +#include <vector> + +#include "common_audio/resampler/push_sinc_resampler.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kFrameSize10ms48kHz = 480; + +void DumpPerfStats(int num_samples, + int sample_rate, + double average_us, + double standard_deviation) { + float audio_track_length_ms = + 1e3f * static_cast<float>(num_samples) / static_cast<float>(sample_rate); + float average_ms = static_cast<float>(average_us) / 1e3f; + float speed = audio_track_length_ms / average_ms; + RTC_LOG(LS_INFO) << "track duration (ms): " << audio_track_length_ms; + RTC_LOG(LS_INFO) << "average processing time (ms): " << average_ms << " +/- " + << (standard_deviation / 1e3); + RTC_LOG(LS_INFO) << "speed: " << speed << "x"; +} + +// When the RNN VAD model is updated and the expected output changes, set the +// constant below to true in order to write new expected output binary files. +constexpr bool kWriteComputedOutputToFile = false; + +// Avoids that one forgets to set `kWriteComputedOutputToFile` back to false +// when the expected output files are re-exported. +TEST(RnnVadTest, CheckWriteComputedOutputIsFalse) { + ASSERT_FALSE(kWriteComputedOutputToFile) + << "Cannot land if kWriteComputedOutput is true."; +} + +class RnnVadProbabilityParametrization + : public ::testing::TestWithParam<AvailableCpuFeatures> {}; + +// Checks that the computed VAD probability for a test input sequence sampled at +// 48 kHz is within tolerance. +TEST_P(RnnVadProbabilityParametrization, RnnVadProbabilityWithinTolerance) { + // Init resampler, feature extractor and RNN. + PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz); + const AvailableCpuFeatures cpu_features = GetParam(); + FeaturesExtractor features_extractor(cpu_features); + RnnVad rnn_vad(cpu_features); + + // Init input samples and expected output readers. + std::unique_ptr<FileReader> samples_reader = CreatePcmSamplesReader(); + std::unique_ptr<FileReader> expected_vad_prob_reader = CreateVadProbsReader(); + + // Input length. The last incomplete frame is ignored. + const int num_frames = samples_reader->size() / kFrameSize10ms48kHz; + + // Init buffers. + std::vector<float> samples_48k(kFrameSize10ms48kHz); + std::vector<float> samples_24k(kFrameSize10ms24kHz); + std::vector<float> feature_vector(kFeatureVectorSize); + std::vector<float> computed_vad_prob(num_frames); + std::vector<float> expected_vad_prob(num_frames); + + // Read expected output. + ASSERT_TRUE(expected_vad_prob_reader->ReadChunk(expected_vad_prob)); + + // Compute VAD probabilities on the downsampled input. + float cumulative_error = 0.f; + for (int i = 0; i < num_frames; ++i) { + ASSERT_TRUE(samples_reader->ReadChunk(samples_48k)); + decimator.Resample(samples_48k.data(), samples_48k.size(), + samples_24k.data(), samples_24k.size()); + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + {samples_24k.data(), kFrameSize10ms24kHz}, + {feature_vector.data(), kFeatureVectorSize}); + computed_vad_prob[i] = rnn_vad.ComputeVadProbability( + {feature_vector.data(), kFeatureVectorSize}, is_silence); + EXPECT_NEAR(computed_vad_prob[i], expected_vad_prob[i], 1e-3f); + cumulative_error += std::abs(computed_vad_prob[i] - expected_vad_prob[i]); + } + // Check average error. + EXPECT_LT(cumulative_error / num_frames, 1e-4f); + + if (kWriteComputedOutputToFile) { + FileWriter vad_prob_writer("new_vad_prob.dat"); + vad_prob_writer.WriteChunk(computed_vad_prob); + } +} + +// Performance test for the RNN VAD (pre-fetching and downsampling are +// excluded). Keep disabled and only enable locally to measure performance as +// follows: +// - on desktop: run the this unit test adding "--logs"; +// - on android: run the this unit test adding "--logcat-output-file". +TEST_P(RnnVadProbabilityParametrization, DISABLED_RnnVadPerformance) { + // PCM samples reader and buffers. + std::unique_ptr<FileReader> samples_reader = CreatePcmSamplesReader(); + // The last incomplete frame is ignored. + const int num_frames = samples_reader->size() / kFrameSize10ms48kHz; + std::array<float, kFrameSize10ms48kHz> samples; + // Pre-fetch and decimate samples. + PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz); + std::vector<float> prefetched_decimated_samples; + prefetched_decimated_samples.resize(num_frames * kFrameSize10ms24kHz); + for (int i = 0; i < num_frames; ++i) { + ASSERT_TRUE(samples_reader->ReadChunk(samples)); + decimator.Resample(samples.data(), samples.size(), + &prefetched_decimated_samples[i * kFrameSize10ms24kHz], + kFrameSize10ms24kHz); + } + // Initialize. + const AvailableCpuFeatures cpu_features = GetParam(); + FeaturesExtractor features_extractor(cpu_features); + std::array<float, kFeatureVectorSize> feature_vector; + RnnVad rnn_vad(cpu_features); + constexpr int number_of_tests = 100; + ::webrtc::test::PerformanceTimer perf_timer(number_of_tests); + for (int k = 0; k < number_of_tests; ++k) { + features_extractor.Reset(); + rnn_vad.Reset(); + // Process frames. + perf_timer.StartTimer(); + for (int i = 0; i < num_frames; ++i) { + bool is_silence = features_extractor.CheckSilenceComputeFeatures( + {&prefetched_decimated_samples[i * kFrameSize10ms24kHz], + kFrameSize10ms24kHz}, + feature_vector); + rnn_vad.ComputeVadProbability(feature_vector, is_silence); + } + perf_timer.StopTimer(); + } + DumpPerfStats(num_frames * kFrameSize10ms24kHz, kSampleRate24kHz, + perf_timer.GetDurationAverage(), + perf_timer.GetDurationStandardDeviation()); +} + +// Finds the relevant CPU features combinations to test. +std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() { + std::vector<AvailableCpuFeatures> v; + v.push_back(NoAvailableCpuFeatures()); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2 && available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnVadProbabilityParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h new file mode 100644 index 0000000000..a7402788c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ + +#include <algorithm> +#include <cstring> +#include <type_traits> +#include <vector> + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { + +// Linear buffer implementation to (i) push fixed size chunks of sequential data +// and (ii) view contiguous parts of the buffer. The buffer and the pushed +// chunks have size S and N respectively. For instance, when S = 2N the first +// half of the sequence buffer is replaced with its second half, and the new N +// values are written at the end of the buffer. +// The class also provides a view on the most recent M values, where 0 < M <= S +// and by default M = N. +template <typename T, int S, int N, int M = N> +class SequenceBuffer { + static_assert(N <= S, + "The new chunk size cannot be larger than the sequence buffer " + "size."); + static_assert(std::is_arithmetic<T>::value, + "Integral or floating point required."); + + public: + SequenceBuffer() : buffer_(S) { + RTC_DCHECK_EQ(S, buffer_.size()); + Reset(); + } + SequenceBuffer(const SequenceBuffer&) = delete; + SequenceBuffer& operator=(const SequenceBuffer&) = delete; + ~SequenceBuffer() = default; + int size() const { return S; } + int chunks_size() const { return N; } + // Sets the sequence buffer values to zero. + void Reset() { std::fill(buffer_.begin(), buffer_.end(), 0); } + // Returns a view on the whole buffer. + rtc::ArrayView<const T, S> GetBufferView() const { + return {buffer_.data(), S}; + } + // Returns a view on the M most recent values of the buffer. + rtc::ArrayView<const T, M> GetMostRecentValuesView() const { + static_assert(M <= S, + "The number of most recent values cannot be larger than the " + "sequence buffer size."); + return {buffer_.data() + S - M, M}; + } + // Shifts left the buffer by N items and add new N items at the end. + void Push(rtc::ArrayView<const T, N> new_values) { + // Make space for the new values. + if (S > N) + std::memmove(buffer_.data(), buffer_.data() + N, (S - N) * sizeof(T)); + // Copy the new values at the end of the buffer. + std::memcpy(buffer_.data() + S - N, new_values.data(), N * sizeof(T)); + } + + private: + std::vector<T> buffer_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SEQUENCE_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc new file mode 100644 index 0000000000..af005833c1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/sequence_buffer_unittest.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/sequence_buffer.h" + +#include <algorithm> +#include <array> + +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +template <typename T, int S, int N> +void TestSequenceBufferPushOp() { + SCOPED_TRACE(S); + SCOPED_TRACE(N); + SequenceBuffer<T, S, N> seq_buf; + auto seq_buf_view = seq_buf.GetBufferView(); + std::array<T, N> chunk; + + // Check that a chunk is fully gone after ceil(S / N) push ops. + chunk.fill(1); + seq_buf.Push(chunk); + chunk.fill(0); + constexpr int required_push_ops = (S % N) ? S / N + 1 : S / N; + for (int i = 0; i < required_push_ops - 1; ++i) { + SCOPED_TRACE(i); + seq_buf.Push(chunk); + // Still in the buffer. + const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end()); + EXPECT_EQ(1, *m); + } + // Gone after another push. + seq_buf.Push(chunk); + const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end()); + EXPECT_EQ(0, *m); + + // Check that the last item moves left by N positions after a push op. + if (S > N) { + // Fill in with non-zero values. + for (int i = 0; i < N; ++i) + chunk[i] = static_cast<T>(i + 1); + seq_buf.Push(chunk); + // With the next Push(), `last` will be moved left by N positions. + const T last = chunk[N - 1]; + for (int i = 0; i < N; ++i) + chunk[i] = static_cast<T>(last + i + 1); + seq_buf.Push(chunk); + EXPECT_EQ(last, seq_buf_view[S - N - 1]); + } +} + +TEST(RnnVadTest, SequenceBufferGetters) { + constexpr int buffer_size = 8; + constexpr int chunk_size = 8; + SequenceBuffer<int, buffer_size, chunk_size> seq_buf; + EXPECT_EQ(buffer_size, seq_buf.size()); + EXPECT_EQ(chunk_size, seq_buf.chunks_size()); + // Test view. + auto seq_buf_view = seq_buf.GetBufferView(); + EXPECT_EQ(0, seq_buf_view[0]); + EXPECT_EQ(0, seq_buf_view[seq_buf_view.size() - 1]); + constexpr std::array<int, chunk_size> chunk = {10, 20, 30, 40, + 50, 60, 70, 80}; + seq_buf.Push(chunk); + EXPECT_EQ(10, *seq_buf_view.begin()); + EXPECT_EQ(80, *(seq_buf_view.end() - 1)); +} + +TEST(RnnVadTest, SequenceBufferPushOpsUnsigned) { + TestSequenceBufferPushOp<uint8_t, 32, 8>(); // Chunk size: 25%. + TestSequenceBufferPushOp<uint8_t, 32, 16>(); // Chunk size: 50%. + TestSequenceBufferPushOp<uint8_t, 32, 32>(); // Chunk size: 100%. + TestSequenceBufferPushOp<uint8_t, 23, 7>(); // Non-integer ratio. +} + +TEST(RnnVadTest, SequenceBufferPushOpsSigned) { + TestSequenceBufferPushOp<int, 32, 8>(); // Chunk size: 25%. + TestSequenceBufferPushOp<int, 32, 16>(); // Chunk size: 50%. + TestSequenceBufferPushOp<int, 32, 32>(); // Chunk size: 100%. + TestSequenceBufferPushOp<int, 23, 7>(); // Non-integer ratio. +} + +TEST(RnnVadTest, SequenceBufferPushOpsFloating) { + TestSequenceBufferPushOp<float, 32, 8>(); // Chunk size: 25%. + TestSequenceBufferPushOp<float, 32, 16>(); // Chunk size: 50%. + TestSequenceBufferPushOp<float, 32, 32>(); // Chunk size: 100%. + TestSequenceBufferPushOp<float, 23, 7>(); // Non-integer ratio. +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc new file mode 100644 index 0000000000..96086babb6 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.cc @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +#include <algorithm> +#include <cmath> +#include <limits> +#include <numeric> + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr float kSilenceThreshold = 0.04f; + +// Computes the new cepstral difference stats and pushes them into the passed +// symmetric matrix buffer. +void UpdateCepstralDifferenceStats( + rtc::ArrayView<const float, kNumBands> new_cepstral_coeffs, + const RingBuffer<float, kNumBands, kCepstralCoeffsHistorySize>& ring_buf, + SymmetricMatrixBuffer<float, kCepstralCoeffsHistorySize>* sym_matrix_buf) { + RTC_DCHECK(sym_matrix_buf); + // Compute the new cepstral distance stats. + std::array<float, kCepstralCoeffsHistorySize - 1> distances; + for (int i = 0; i < kCepstralCoeffsHistorySize - 1; ++i) { + const int delay = i + 1; + auto old_cepstral_coeffs = ring_buf.GetArrayView(delay); + distances[i] = 0.f; + for (int k = 0; k < kNumBands; ++k) { + const float c = new_cepstral_coeffs[k] - old_cepstral_coeffs[k]; + distances[i] += c * c; + } + } + // Push the new spectral distance stats into the symmetric matrix buffer. + sym_matrix_buf->Push(distances); +} + +// Computes the first half of the Vorbis window. +std::array<float, kFrameSize20ms24kHz / 2> ComputeScaledHalfVorbisWindow( + float scaling = 1.f) { + constexpr int kHalfSize = kFrameSize20ms24kHz / 2; + std::array<float, kHalfSize> half_window{}; + for (int i = 0; i < kHalfSize; ++i) { + half_window[i] = + scaling * + std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfSize) * + std::sin(0.5 * kPi * (i + 0.5) / kHalfSize)); + } + return half_window; +} + +// Computes the forward FFT on a 20 ms frame to which a given window function is +// applied. The Fourier coefficient corresponding to the Nyquist frequency is +// set to zero (it is never used and this allows to simplify the code). +void ComputeWindowedForwardFft( + rtc::ArrayView<const float, kFrameSize20ms24kHz> frame, + const std::array<float, kFrameSize20ms24kHz / 2>& half_window, + Pffft::FloatBuffer* fft_input_buffer, + Pffft::FloatBuffer* fft_output_buffer, + Pffft* fft) { + RTC_DCHECK_EQ(frame.size(), 2 * half_window.size()); + // Apply windowing. + auto in = fft_input_buffer->GetView(); + for (int i = 0, j = kFrameSize20ms24kHz - 1; + rtc::SafeLt(i, half_window.size()); ++i, --j) { + in[i] = frame[i] * half_window[i]; + in[j] = frame[j] * half_window[i]; + } + fft->ForwardTransform(*fft_input_buffer, fft_output_buffer, /*ordered=*/true); + // Set the Nyquist frequency coefficient to zero. + auto out = fft_output_buffer->GetView(); + out[1] = 0.f; +} + +} // namespace + +SpectralFeaturesExtractor::SpectralFeaturesExtractor() + : half_window_(ComputeScaledHalfVorbisWindow( + 1.f / static_cast<float>(kFrameSize20ms24kHz))), + fft_(kFrameSize20ms24kHz, Pffft::FftType::kReal), + fft_buffer_(fft_.CreateBuffer()), + reference_frame_fft_(fft_.CreateBuffer()), + lagged_frame_fft_(fft_.CreateBuffer()), + dct_table_(ComputeDctTable()) {} + +SpectralFeaturesExtractor::~SpectralFeaturesExtractor() = default; + +void SpectralFeaturesExtractor::Reset() { + cepstral_coeffs_ring_buf_.Reset(); + cepstral_diffs_buf_.Reset(); +} + +bool SpectralFeaturesExtractor::CheckSilenceComputeFeatures( + rtc::ArrayView<const float, kFrameSize20ms24kHz> reference_frame, + rtc::ArrayView<const float, kFrameSize20ms24kHz> lagged_frame, + rtc::ArrayView<float, kNumBands - kNumLowerBands> higher_bands_cepstrum, + rtc::ArrayView<float, kNumLowerBands> average, + rtc::ArrayView<float, kNumLowerBands> first_derivative, + rtc::ArrayView<float, kNumLowerBands> second_derivative, + rtc::ArrayView<float, kNumLowerBands> bands_cross_corr, + float* variability) { + // Compute the Opus band energies for the reference frame. + ComputeWindowedForwardFft(reference_frame, half_window_, fft_buffer_.get(), + reference_frame_fft_.get(), &fft_); + spectral_correlator_.ComputeAutoCorrelation( + reference_frame_fft_->GetConstView(), reference_frame_bands_energy_); + // Check if the reference frame has silence. + const float tot_energy = + std::accumulate(reference_frame_bands_energy_.begin(), + reference_frame_bands_energy_.end(), 0.f); + if (tot_energy < kSilenceThreshold) { + return true; + } + // Compute the Opus band energies for the lagged frame. + ComputeWindowedForwardFft(lagged_frame, half_window_, fft_buffer_.get(), + lagged_frame_fft_.get(), &fft_); + spectral_correlator_.ComputeAutoCorrelation(lagged_frame_fft_->GetConstView(), + lagged_frame_bands_energy_); + // Log of the band energies for the reference frame. + std::array<float, kNumBands> log_bands_energy; + ComputeSmoothedLogMagnitudeSpectrum(reference_frame_bands_energy_, + log_bands_energy); + // Reference frame cepstrum. + std::array<float, kNumBands> cepstrum; + ComputeDct(log_bands_energy, dct_table_, cepstrum); + // Ad-hoc correction terms for the first two cepstral coefficients. + cepstrum[0] -= 12.f; + cepstrum[1] -= 4.f; + // Update the ring buffer and the cepstral difference stats. + cepstral_coeffs_ring_buf_.Push(cepstrum); + UpdateCepstralDifferenceStats(cepstrum, cepstral_coeffs_ring_buf_, + &cepstral_diffs_buf_); + // Write the higher bands cepstral coefficients. + RTC_DCHECK_EQ(cepstrum.size() - kNumLowerBands, higher_bands_cepstrum.size()); + std::copy(cepstrum.begin() + kNumLowerBands, cepstrum.end(), + higher_bands_cepstrum.begin()); + // Compute and write remaining features. + ComputeAvgAndDerivatives(average, first_derivative, second_derivative); + ComputeNormalizedCepstralCorrelation(bands_cross_corr); + RTC_DCHECK(variability); + *variability = ComputeVariability(); + return false; +} + +void SpectralFeaturesExtractor::ComputeAvgAndDerivatives( + rtc::ArrayView<float, kNumLowerBands> average, + rtc::ArrayView<float, kNumLowerBands> first_derivative, + rtc::ArrayView<float, kNumLowerBands> second_derivative) const { + auto curr = cepstral_coeffs_ring_buf_.GetArrayView(0); + auto prev1 = cepstral_coeffs_ring_buf_.GetArrayView(1); + auto prev2 = cepstral_coeffs_ring_buf_.GetArrayView(2); + RTC_DCHECK_EQ(average.size(), first_derivative.size()); + RTC_DCHECK_EQ(first_derivative.size(), second_derivative.size()); + RTC_DCHECK_LE(average.size(), curr.size()); + for (int i = 0; rtc::SafeLt(i, average.size()); ++i) { + // Average, kernel: [1, 1, 1]. + average[i] = curr[i] + prev1[i] + prev2[i]; + // First derivative, kernel: [1, 0, - 1]. + first_derivative[i] = curr[i] - prev2[i]; + // Second derivative, Laplacian kernel: [1, -2, 1]. + second_derivative[i] = curr[i] - 2 * prev1[i] + prev2[i]; + } +} + +void SpectralFeaturesExtractor::ComputeNormalizedCepstralCorrelation( + rtc::ArrayView<float, kNumLowerBands> bands_cross_corr) { + spectral_correlator_.ComputeCrossCorrelation( + reference_frame_fft_->GetConstView(), lagged_frame_fft_->GetConstView(), + bands_cross_corr_); + // Normalize. + for (int i = 0; rtc::SafeLt(i, bands_cross_corr_.size()); ++i) { + bands_cross_corr_[i] = + bands_cross_corr_[i] / + std::sqrt(0.001f + reference_frame_bands_energy_[i] * + lagged_frame_bands_energy_[i]); + } + // Cepstrum. + ComputeDct(bands_cross_corr_, dct_table_, bands_cross_corr); + // Ad-hoc correction terms for the first two cepstral coefficients. + bands_cross_corr[0] -= 1.3f; + bands_cross_corr[1] -= 0.9f; +} + +float SpectralFeaturesExtractor::ComputeVariability() const { + // Compute cepstral variability score. + float variability = 0.f; + for (int delay1 = 0; delay1 < kCepstralCoeffsHistorySize; ++delay1) { + float min_dist = std::numeric_limits<float>::max(); + for (int delay2 = 0; delay2 < kCepstralCoeffsHistorySize; ++delay2) { + if (delay1 == delay2) // The distance would be 0. + continue; + min_dist = + std::min(min_dist, cepstral_diffs_buf_.GetValue(delay1, delay2)); + } + variability += min_dist; + } + // Normalize (based on training set stats). + // TODO(bugs.webrtc.org/10480): Isolate normalization from feature extraction. + return variability / kCepstralCoeffsHistorySize - 2.1f; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h new file mode 100644 index 0000000000..d327ef8e01 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ + +#include <array> +#include <cstddef> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" +#include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" + +namespace webrtc { +namespace rnn_vad { + +// Class to compute spectral features. +class SpectralFeaturesExtractor { + public: + SpectralFeaturesExtractor(); + SpectralFeaturesExtractor(const SpectralFeaturesExtractor&) = delete; + SpectralFeaturesExtractor& operator=(const SpectralFeaturesExtractor&) = + delete; + ~SpectralFeaturesExtractor(); + // Resets the internal state of the feature extractor. + void Reset(); + // Analyzes a pair of reference and lagged frames from the pitch buffer, + // detects silence and computes features. If silence is detected, the output + // is neither computed nor written. + bool CheckSilenceComputeFeatures( + rtc::ArrayView<const float, kFrameSize20ms24kHz> reference_frame, + rtc::ArrayView<const float, kFrameSize20ms24kHz> lagged_frame, + rtc::ArrayView<float, kNumBands - kNumLowerBands> higher_bands_cepstrum, + rtc::ArrayView<float, kNumLowerBands> average, + rtc::ArrayView<float, kNumLowerBands> first_derivative, + rtc::ArrayView<float, kNumLowerBands> second_derivative, + rtc::ArrayView<float, kNumLowerBands> bands_cross_corr, + float* variability); + + private: + void ComputeAvgAndDerivatives( + rtc::ArrayView<float, kNumLowerBands> average, + rtc::ArrayView<float, kNumLowerBands> first_derivative, + rtc::ArrayView<float, kNumLowerBands> second_derivative) const; + void ComputeNormalizedCepstralCorrelation( + rtc::ArrayView<float, kNumLowerBands> bands_cross_corr); + float ComputeVariability() const; + + const std::array<float, kFrameSize20ms24kHz / 2> half_window_; + Pffft fft_; + std::unique_ptr<Pffft::FloatBuffer> fft_buffer_; + std::unique_ptr<Pffft::FloatBuffer> reference_frame_fft_; + std::unique_ptr<Pffft::FloatBuffer> lagged_frame_fft_; + SpectralCorrelator spectral_correlator_; + std::array<float, kOpusBands24kHz> reference_frame_bands_energy_; + std::array<float, kOpusBands24kHz> lagged_frame_bands_energy_; + std::array<float, kOpusBands24kHz> bands_cross_corr_; + const std::array<float, kNumBands * kNumBands> dct_table_; + RingBuffer<float, kNumBands, kCepstralCoeffsHistorySize> + cepstral_coeffs_ring_buf_; + SymmetricMatrixBuffer<float, kCepstralCoeffsHistorySize> cepstral_diffs_buf_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc new file mode 100644 index 0000000000..a10b0f7ec9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.cc @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" + +#include <algorithm> +#include <cmath> +#include <cstddef> + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Weights for each FFT coefficient for each Opus band (Nyquist frequency +// excluded). The size of each band is specified in +// `kOpusScaleNumBins24kHz20ms`. +constexpr std::array<float, kFrameSize20ms24kHz / 2> kOpusBandWeights24kHz20ms = + {{ + 0.f, 0.25f, 0.5f, 0.75f, // Band 0 + 0.f, 0.25f, 0.5f, 0.75f, // Band 1 + 0.f, 0.25f, 0.5f, 0.75f, // Band 2 + 0.f, 0.25f, 0.5f, 0.75f, // Band 3 + 0.f, 0.25f, 0.5f, 0.75f, // Band 4 + 0.f, 0.25f, 0.5f, 0.75f, // Band 5 + 0.f, 0.25f, 0.5f, 0.75f, // Band 6 + 0.f, 0.25f, 0.5f, 0.75f, // Band 7 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 8 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 9 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 10 + 0.f, 0.125f, 0.25f, 0.375f, 0.5f, + 0.625f, 0.75f, 0.875f, // Band 11 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 12 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 13 + 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f, + 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f, + 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f, + 0.9375f, // Band 14 + 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f, + 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f, + 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f, + 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f, + 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 15 + 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f, + 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f, + 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f, + 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f, + 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 16 + 0.f, 0.03125f, 0.0625f, 0.09375f, 0.125f, + 0.15625f, 0.1875f, 0.21875f, 0.25f, 0.28125f, + 0.3125f, 0.34375f, 0.375f, 0.40625f, 0.4375f, + 0.46875f, 0.5f, 0.53125f, 0.5625f, 0.59375f, + 0.625f, 0.65625f, 0.6875f, 0.71875f, 0.75f, + 0.78125f, 0.8125f, 0.84375f, 0.875f, 0.90625f, + 0.9375f, 0.96875f, // Band 17 + 0.f, 0.0208333f, 0.0416667f, 0.0625f, 0.0833333f, + 0.104167f, 0.125f, 0.145833f, 0.166667f, 0.1875f, + 0.208333f, 0.229167f, 0.25f, 0.270833f, 0.291667f, + 0.3125f, 0.333333f, 0.354167f, 0.375f, 0.395833f, + 0.416667f, 0.4375f, 0.458333f, 0.479167f, 0.5f, + 0.520833f, 0.541667f, 0.5625f, 0.583333f, 0.604167f, + 0.625f, 0.645833f, 0.666667f, 0.6875f, 0.708333f, + 0.729167f, 0.75f, 0.770833f, 0.791667f, 0.8125f, + 0.833333f, 0.854167f, 0.875f, 0.895833f, 0.916667f, + 0.9375f, 0.958333f, 0.979167f // Band 18 + }}; + +} // namespace + +SpectralCorrelator::SpectralCorrelator() + : weights_(kOpusBandWeights24kHz20ms.begin(), + kOpusBandWeights24kHz20ms.end()) {} + +SpectralCorrelator::~SpectralCorrelator() = default; + +void SpectralCorrelator::ComputeAutoCorrelation( + rtc::ArrayView<const float> x, + rtc::ArrayView<float, kOpusBands24kHz> auto_corr) const { + ComputeCrossCorrelation(x, x, auto_corr); +} + +void SpectralCorrelator::ComputeCrossCorrelation( + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float, kOpusBands24kHz> cross_corr) const { + RTC_DCHECK_EQ(x.size(), kFrameSize20ms24kHz); + RTC_DCHECK_EQ(x.size(), y.size()); + RTC_DCHECK_EQ(x[1], 0.f) << "The Nyquist coefficient must be zeroed."; + RTC_DCHECK_EQ(y[1], 0.f) << "The Nyquist coefficient must be zeroed."; + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + int k = 0; // Next Fourier coefficient index. + cross_corr[0] = 0.f; + for (int i = 0; i < kOpusBands24kHz - 1; ++i) { + cross_corr[i + 1] = 0.f; + for (int j = 0; j < kOpusScaleNumBins24kHz20ms[i]; ++j) { // Band size. + const float v = x[2 * k] * y[2 * k] + x[2 * k + 1] * y[2 * k + 1]; + const float tmp = weights_[k] * v; + cross_corr[i] += v - tmp; + cross_corr[i + 1] += tmp; + k++; + } + } + cross_corr[0] *= 2.f; // The first band only gets half contribution. + RTC_DCHECK_EQ(k, kFrameSize20ms24kHz / 2); // Nyquist coefficient never used. +} + +void ComputeSmoothedLogMagnitudeSpectrum( + rtc::ArrayView<const float> bands_energy, + rtc::ArrayView<float, kNumBands> log_bands_energy) { + RTC_DCHECK_LE(bands_energy.size(), kNumBands); + constexpr float kOneByHundred = 1e-2f; + constexpr float kLogOneByHundred = -2.f; + // Init. + float log_max = kLogOneByHundred; + float follow = kLogOneByHundred; + const auto smooth = [&log_max, &follow](float x) { + x = std::max(log_max - 7.f, std::max(follow - 1.5f, x)); + log_max = std::max(log_max, x); + follow = std::max(follow - 1.5f, x); + return x; + }; + // Smoothing over the bands for which the band energy is defined. + for (int i = 0; rtc::SafeLt(i, bands_energy.size()); ++i) { + log_bands_energy[i] = smooth(std::log10(kOneByHundred + bands_energy[i])); + } + // Smoothing over the remaining bands (zero energy). + for (int i = bands_energy.size(); i < kNumBands; ++i) { + log_bands_energy[i] = smooth(kLogOneByHundred); + } +} + +std::array<float, kNumBands * kNumBands> ComputeDctTable() { + std::array<float, kNumBands * kNumBands> dct_table; + const double k = std::sqrt(0.5); + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < kNumBands; ++j) + dct_table[i * kNumBands + j] = std::cos((i + 0.5) * j * kPi / kNumBands); + dct_table[i * kNumBands] *= k; + } + return dct_table; +} + +void ComputeDct(rtc::ArrayView<const float> in, + rtc::ArrayView<const float, kNumBands * kNumBands> dct_table, + rtc::ArrayView<float> out) { + // DCT scaling factor - i.e., sqrt(2 / kNumBands). + constexpr float kDctScalingFactor = 0.301511345f; + constexpr float kDctScalingFactorError = + kDctScalingFactor * kDctScalingFactor - + 2.f / static_cast<float>(kNumBands); + static_assert( + (kDctScalingFactorError >= 0.f && kDctScalingFactorError < 1e-1f) || + (kDctScalingFactorError < 0.f && kDctScalingFactorError > -1e-1f), + "kNumBands changed and kDctScalingFactor has not been updated."); + RTC_DCHECK_NE(in.data(), out.data()) << "In-place DCT is not supported."; + RTC_DCHECK_LE(in.size(), kNumBands); + RTC_DCHECK_LE(1, out.size()); + RTC_DCHECK_LE(out.size(), in.size()); + for (int i = 0; rtc::SafeLt(i, out.size()); ++i) { + out[i] = 0.f; + for (int j = 0; rtc::SafeLt(j, in.size()); ++j) { + out[i] += in[j] * dct_table[j * kNumBands + i]; + } + // TODO(bugs.webrtc.org/10480): Scaling factor in the DCT table. + out[i] *= kDctScalingFactor; + } +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h new file mode 100644 index 0000000000..f4b293a567 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ + +#include <stddef.h> + +#include <array> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" + +namespace webrtc { +namespace rnn_vad { + +// At a sample rate of 24 kHz, the last 3 Opus bands are beyond the Nyquist +// frequency. However, band #19 gets the contributions from band #18 because +// of the symmetric triangular filter with peak response at 12 kHz. +constexpr int kOpusBands24kHz = 20; +static_assert(kOpusBands24kHz < kNumBands, + "The number of bands at 24 kHz must be less than those defined " + "in the Opus scale at 48 kHz."); + +// Number of FFT frequency bins covered by each band in the Opus scale at a +// sample rate of 24 kHz for 20 ms frames. +// Declared here for unit testing. +constexpr std::array<int, kOpusBands24kHz - 1> GetOpusScaleNumBins24kHz20ms() { + return {4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 24, 24, 32, 48}; +} + +// TODO(bugs.webrtc.org/10480): Move to a separate file. +// Class to compute band-wise spectral features in the Opus perceptual scale +// for 20 ms frames sampled at 24 kHz. The analysis methods apply triangular +// filters with peak response at the each band boundary. +class SpectralCorrelator { + public: + // Ctor. + SpectralCorrelator(); + SpectralCorrelator(const SpectralCorrelator&) = delete; + SpectralCorrelator& operator=(const SpectralCorrelator&) = delete; + ~SpectralCorrelator(); + + // Computes the band-wise spectral auto-correlations. + // `x` must: + // - have size equal to `kFrameSize20ms24kHz`; + // - be encoded as vectors of interleaved real-complex FFT coefficients + // where x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). + void ComputeAutoCorrelation( + rtc::ArrayView<const float> x, + rtc::ArrayView<float, kOpusBands24kHz> auto_corr) const; + + // Computes the band-wise spectral cross-correlations. + // `x` and `y` must: + // - have size equal to `kFrameSize20ms24kHz`; + // - be encoded as vectors of interleaved real-complex FFT coefficients where + // x[1] = y[1] = 0 (the Nyquist frequency coefficient is omitted). + void ComputeCrossCorrelation( + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float, kOpusBands24kHz> cross_corr) const; + + private: + const std::vector<float> weights_; // Weights for each Fourier coefficient. +}; + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Given a vector of Opus-bands energy coefficients, +// computes the log magnitude spectrum applying smoothing both over time and +// over frequency. Declared here for unit testing. +void ComputeSmoothedLogMagnitudeSpectrum( + rtc::ArrayView<const float> bands_energy, + rtc::ArrayView<float, kNumBands> log_bands_energy); + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Creates a DCT table for arrays having size equal to +// `kNumBands`. Declared here for unit testing. +std::array<float, kNumBands * kNumBands> ComputeDctTable(); + +// TODO(bugs.webrtc.org/10480): Move to anonymous namespace in +// spectral_features.cc. Computes DCT for `in` given a pre-computed DCT table. +// In-place computation is not allowed and `out` can be smaller than `in` in +// order to only compute the first DCT coefficients. Declared here for unit +// testing. +void ComputeDct(rtc::ArrayView<const float> in, + rtc::ArrayView<const float, kNumBands * kNumBands> dct_table, + rtc::ArrayView<float> out); + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SPECTRAL_FEATURES_INTERNAL_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc new file mode 100644 index 0000000000..ece4eb5024 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_internal_unittest.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h" + +#include <algorithm> +#include <array> +#include <complex> +#include <numeric> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/utility/pffft_wrapper.h" +#include "rtc_base/numerics/safe_compare.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// Generates the values for the array named `kOpusBandWeights24kHz20ms` in the +// anonymous namespace of the .cc file, which is the array of FFT coefficient +// weights for the Opus scale triangular filters. +std::vector<float> ComputeTriangularFiltersWeights() { + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + const auto& v = kOpusScaleNumBins24kHz20ms; // Alias. + const int num_weights = std::accumulate(kOpusScaleNumBins24kHz20ms.begin(), + kOpusScaleNumBins24kHz20ms.end(), 0); + std::vector<float> weights(num_weights); + int next_fft_coeff_index = 0; + for (int band = 0; rtc::SafeLt(band, v.size()); ++band) { + const int band_size = v[band]; + for (int j = 0; rtc::SafeLt(j, band_size); ++j) { + weights[next_fft_coeff_index + j] = static_cast<float>(j) / band_size; + } + next_fft_coeff_index += band_size; + } + return weights; +} + +// Checks that the values returned by GetOpusScaleNumBins24kHz20ms() match the +// Opus scale frequency boundaries. +TEST(RnnVadTest, TestOpusScaleBoundaries) { + constexpr int kBandFrequencyBoundariesHz[kNumBands - 1] = { + 200, 400, 600, 800, 1000, 1200, 1400, 1600, 2000, 2400, 2800, + 3200, 4000, 4800, 5600, 6800, 8000, 9600, 12000, 15600, 20000}; + constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms(); + int prev = 0; + for (int i = 0; rtc::SafeLt(i, kOpusScaleNumBins24kHz20ms.size()); ++i) { + int boundary = + kBandFrequencyBoundariesHz[i] * kFrameSize20ms24kHz / kSampleRate24kHz; + EXPECT_EQ(kOpusScaleNumBins24kHz20ms[i], boundary - prev); + prev = boundary; + } +} + +// Checks that the computed triangular filters weights for the Opus scale are +// monotonic withing each Opus band. This test should only be enabled when +// ComputeTriangularFiltersWeights() is changed and `kOpusBandWeights24kHz20ms` +// is updated accordingly. +TEST(RnnVadTest, DISABLED_TestOpusScaleWeights) { + auto weights = ComputeTriangularFiltersWeights(); + int i = 0; + for (int band_size : GetOpusScaleNumBins24kHz20ms()) { + SCOPED_TRACE(band_size); + rtc::ArrayView<float> band_weights(weights.data() + i, band_size); + float prev = -1.f; + for (float weight : band_weights) { + EXPECT_LT(prev, weight); + prev = weight; + } + i += band_size; + } +} + +// Checks that the computed band-wise auto-correlation is non-negative for a +// simple input vector of FFT coefficients. +TEST(RnnVadTest, SpectralCorrelatorValidOutput) { + // Input: vector of (1, 1j) values. + Pffft fft(kFrameSize20ms24kHz, Pffft::FftType::kReal); + auto in = fft.CreateBuffer(); + std::array<float, kOpusBands24kHz> out; + auto in_view = in->GetView(); + std::fill(in_view.begin(), in_view.end(), 1.f); + in_view[1] = 0.f; // Nyquist frequency. + // Compute and check output. + SpectralCorrelator e; + e.ComputeAutoCorrelation(in_view, out); + for (int i = 0; i < kOpusBands24kHz; ++i) { + SCOPED_TRACE(i); + EXPECT_GT(out[i], 0.f); + } +} + +// Checks that the computed smoothed log magnitude spectrum is within tolerance +// given hard-coded test input data. +TEST(RnnVadTest, ComputeSmoothedLogMagnitudeSpectrumWithinTolerance) { + constexpr std::array<float, kNumBands> input = { + {86.060539245605f, 275.668334960938f, 43.406528472900f, 6.541896820068f, + 17.964015960693f, 8.090919494629f, 1.261920094490f, 1.212702631950f, + 1.619154453278f, 0.508935272694f, 0.346316039562f, 0.237035423517f, + 0.172424271703f, 0.271657168865f, 0.126088857651f, 0.139967113733f, + 0.207200810313f, 0.155893072486f, 0.091090843081f, 0.033391401172f, + 0.013879744336f, 0.011973354965f}}; + constexpr std::array<float, kNumBands> expected_output = { + {1.934854507446f, 2.440402746201f, 1.637655138969f, 0.816367030144f, + 1.254645109177f, 0.908534288406f, 0.104459829628f, 0.087320849299f, + 0.211962252855f, -0.284886807203f, -0.448164641857f, -0.607240796089f, + -0.738917350769f, -0.550279200077f, -0.866177439690f, -0.824003994465f, + -0.663138568401f, -0.780171751976f, -0.995288193226f, -1.362596273422f, + -1.621970295906f, -1.658103585243f}}; + std::array<float, kNumBands> computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeSmoothedLogMagnitudeSpectrum(input, computed_output); + ExpectNearAbsolute(expected_output, computed_output, 1e-5f); + } +} + +// Checks that the computed DCT is within tolerance given hard-coded test input +// data. +TEST(RnnVadTest, ComputeDctWithinTolerance) { + constexpr std::array<float, kNumBands> input = { + {0.232155621052f, 0.678957760334f, 0.220818966627f, -0.077363930643f, + -0.559227049351f, 0.432545185089f, 0.353900641203f, 0.398993015289f, + 0.409774333239f, 0.454977899790f, 0.300520688295f, -0.010286616161f, + 0.272525429726f, 0.098067551851f, 0.083649002016f, 0.046226885170f, + -0.033228103071f, 0.144773483276f, -0.117661058903f, -0.005628800020f, + -0.009547689930f, -0.045382082462f}}; + constexpr std::array<float, kNumBands> expected_output = { + {0.697072803974f, 0.442710995674f, -0.293156713247f, -0.060711503029f, + 0.292050391436f, 0.489301353693f, 0.402255415916f, 0.134404733777f, + -0.086305990815f, -0.199605688453f, -0.234511867166f, -0.413774639368f, + -0.388507157564f, -0.032798115164f, 0.044605545700f, 0.112466648221f, + -0.050096966326f, 0.045971218497f, -0.029815061018f, -0.410366982222f, + -0.209233760834f, -0.128037497401f}}; + auto dct_table = ComputeDctTable(); + std::array<float, kNumBands> computed_output; + { + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + ComputeDct(input, dct_table, computed_output); + ExpectNearAbsolute(expected_output, computed_output, 1e-5f); + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc new file mode 100644 index 0000000000..324d694957 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/spectral_features_unittest.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/spectral_features.h" + +#include <algorithm> + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kTestFeatureVectorSize = kNumBands + 3 * kNumLowerBands + 1; + +// Writes non-zero sample values. +void WriteTestData(rtc::ArrayView<float> samples) { + for (int i = 0; rtc::SafeLt(i, samples.size()); ++i) { + samples[i] = i % 100; + } +} + +rtc::ArrayView<float, kNumBands - kNumLowerBands> GetHigherBandsSpectrum( + std::array<float, kTestFeatureVectorSize>* feature_vector) { + return {feature_vector->data() + kNumLowerBands, kNumBands - kNumLowerBands}; +} + +rtc::ArrayView<float, kNumLowerBands> GetAverage( + std::array<float, kTestFeatureVectorSize>* feature_vector) { + return {feature_vector->data(), kNumLowerBands}; +} + +rtc::ArrayView<float, kNumLowerBands> GetFirstDerivative( + std::array<float, kTestFeatureVectorSize>* feature_vector) { + return {feature_vector->data() + kNumBands, kNumLowerBands}; +} + +rtc::ArrayView<float, kNumLowerBands> GetSecondDerivative( + std::array<float, kTestFeatureVectorSize>* feature_vector) { + return {feature_vector->data() + kNumBands + kNumLowerBands, kNumLowerBands}; +} + +rtc::ArrayView<float, kNumLowerBands> GetCepstralCrossCorrelation( + std::array<float, kTestFeatureVectorSize>* feature_vector) { + return {feature_vector->data() + kNumBands + 2 * kNumLowerBands, + kNumLowerBands}; +} + +float* GetCepstralVariability( + std::array<float, kTestFeatureVectorSize>* feature_vector) { + return feature_vector->data() + kNumBands + 3 * kNumLowerBands; +} + +constexpr float kInitialFeatureVal = -9999.f; + +// Checks that silence is detected when the input signal is 0 and that the +// feature vector is written only if the input signal is not tagged as silence. +TEST(RnnVadTest, SpectralFeaturesWithAndWithoutSilence) { + // Initialize. + SpectralFeaturesExtractor sfe; + std::array<float, kFrameSize20ms24kHz> samples; + rtc::ArrayView<float, kFrameSize20ms24kHz> samples_view(samples); + bool is_silence; + std::array<float, kTestFeatureVectorSize> feature_vector; + + // Write an initial value in the feature vector to detect changes. + std::fill(feature_vector.begin(), feature_vector.end(), kInitialFeatureVal); + + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + + // With silence. + std::fill(samples.begin(), samples.end(), 0.f); + is_silence = sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + // Silence is expected, the output won't be overwritten. + EXPECT_TRUE(is_silence); + EXPECT_TRUE(std::all_of(feature_vector.begin(), feature_vector.end(), + [](float x) { return x == kInitialFeatureVal; })); + + // With no silence. + WriteTestData(samples); + is_silence = sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + // Silence is not expected, the output will be overwritten. + EXPECT_FALSE(is_silence); + EXPECT_FALSE(std::all_of(feature_vector.begin(), feature_vector.end(), + [](float x) { return x == kInitialFeatureVal; })); +} + +// Feeds a constant input signal and checks that: +// - the cepstral coefficients average does not change; +// - the derivatives are zero; +// - the cepstral variability score does not change. +TEST(RnnVadTest, CepstralFeaturesConstantAverageZeroDerivative) { + // Initialize. + SpectralFeaturesExtractor sfe; + std::array<float, kFrameSize20ms24kHz> samples; + rtc::ArrayView<float, kFrameSize20ms24kHz> samples_view(samples); + WriteTestData(samples); + + // Fill the spectral features with test data. + std::array<float, kTestFeatureVectorSize> feature_vector; + for (int i = 0; i < kCepstralCoeffsHistorySize; ++i) { + sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector), + GetAverage(&feature_vector), GetFirstDerivative(&feature_vector), + GetSecondDerivative(&feature_vector), + GetCepstralCrossCorrelation(&feature_vector), + GetCepstralVariability(&feature_vector)); + } + + // Feed the test data one last time but using a different output vector. + std::array<float, kTestFeatureVectorSize> feature_vector_last; + sfe.CheckSilenceComputeFeatures( + samples_view, samples_view, GetHigherBandsSpectrum(&feature_vector_last), + GetAverage(&feature_vector_last), + GetFirstDerivative(&feature_vector_last), + GetSecondDerivative(&feature_vector_last), + GetCepstralCrossCorrelation(&feature_vector_last), + GetCepstralVariability(&feature_vector_last)); + + // Average is unchanged. + ExpectEqualFloatArray({feature_vector.data(), kNumLowerBands}, + {feature_vector_last.data(), kNumLowerBands}); + // First and second derivatives are zero. + constexpr std::array<float, kNumLowerBands> zeros{}; + ExpectEqualFloatArray( + {feature_vector_last.data() + kNumBands, kNumLowerBands}, zeros); + ExpectEqualFloatArray( + {feature_vector_last.data() + kNumBands + kNumLowerBands, kNumLowerBands}, + zeros); + // Variability is unchanged. + EXPECT_FLOAT_EQ(feature_vector[kNumBands + 3 * kNumLowerBands], + feature_vector_last[kNumBands + 3 * kNumLowerBands]); +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h new file mode 100644 index 0000000000..d186479551 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ + +#include <algorithm> +#include <array> +#include <cstring> +#include <utility> + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { + +// Data structure to buffer the results of pair-wise comparisons between items +// stored in a ring buffer. Every time that the oldest item is replaced in the +// ring buffer, the new one is compared to the remaining items in the ring +// buffer. The results of such comparisons need to be buffered and automatically +// removed when one of the two corresponding items that have been compared is +// removed from the ring buffer. It is assumed that the comparison is symmetric +// and that comparing an item with itself is not needed. +template <typename T, int S> +class SymmetricMatrixBuffer { + static_assert(S > 2, ""); + + public: + SymmetricMatrixBuffer() = default; + SymmetricMatrixBuffer(const SymmetricMatrixBuffer&) = delete; + SymmetricMatrixBuffer& operator=(const SymmetricMatrixBuffer&) = delete; + ~SymmetricMatrixBuffer() = default; + // Sets the buffer values to zero. + void Reset() { + static_assert(std::is_arithmetic<T>::value, + "Integral or floating point required."); + buf_.fill(0); + } + // Pushes the results from the comparison between the most recent item and + // those that are still in the ring buffer. The first element in `values` must + // correspond to the comparison between the most recent item and the second + // most recent one in the ring buffer, whereas the last element in `values` + // must correspond to the comparison between the most recent item and the + // oldest one in the ring buffer. + void Push(rtc::ArrayView<T, S - 1> values) { + // Move the lower-right sub-matrix of size (S-2) x (S-2) one row up and one + // column left. + std::memmove(buf_.data(), buf_.data() + S, (buf_.size() - S) * sizeof(T)); + // Copy new values in the last column in the right order. + for (int i = 0; rtc::SafeLt(i, values.size()); ++i) { + const int index = (S - 1 - i) * (S - 1) - 1; + RTC_DCHECK_GE(index, 0); + RTC_DCHECK_LT(index, buf_.size()); + buf_[index] = values[i]; + } + } + // Reads the value that corresponds to comparison of two items in the ring + // buffer having delay `delay1` and `delay2`. The two arguments must not be + // equal and both must be in {0, ..., S - 1}. + T GetValue(int delay1, int delay2) const { + int row = S - 1 - delay1; + int col = S - 1 - delay2; + RTC_DCHECK_NE(row, col) << "The diagonal cannot be accessed."; + if (row > col) + std::swap(row, col); // Swap to access the upper-right triangular part. + RTC_DCHECK_LE(0, row); + RTC_DCHECK_LT(row, S - 1) << "Not enforcing row < col and row != col."; + RTC_DCHECK_LE(1, col) << "Not enforcing row < col and row != col."; + RTC_DCHECK_LT(col, S); + const int index = row * (S - 1) + (col - 1); + RTC_DCHECK_LE(0, index); + RTC_DCHECK_LT(index, buf_.size()); + return buf_[index]; + } + + private: + // Encode an upper-right triangular matrix (excluding its diagonal) using a + // square matrix. This allows to move the data in Push() with one single + // operation. + std::array<T, (S - 1) * (S - 1)> buf_{}; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_SYMMETRIC_MATRIX_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc new file mode 100644 index 0000000000..1509ca5ac1 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer_unittest.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/symmetric_matrix_buffer.h" + +#include "modules/audio_processing/agc2/rnn_vad/ring_buffer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +template <typename T, int S> +void CheckSymmetry(const SymmetricMatrixBuffer<T, S>* sym_matrix_buf) { + for (int row = 0; row < S - 1; ++row) + for (int col = row + 1; col < S; ++col) + EXPECT_EQ(sym_matrix_buf->GetValue(row, col), + sym_matrix_buf->GetValue(col, row)); +} + +using PairType = std::pair<int, int>; + +// Checks that the symmetric matrix buffer contains any pair with a value equal +// to the given one. +template <int S> +bool CheckPairsWithValueExist( + const SymmetricMatrixBuffer<PairType, S>* sym_matrix_buf, + const int value) { + for (int row = 0; row < S - 1; ++row) { + for (int col = row + 1; col < S; ++col) { + auto p = sym_matrix_buf->GetValue(row, col); + if (p.first == value || p.second == value) + return true; + } + } + return false; +} + +// Test that shows how to combine RingBuffer and SymmetricMatrixBuffer to +// efficiently compute pair-wise scores. This test verifies that the evolution +// of a SymmetricMatrixBuffer instance follows that of RingBuffer. +TEST(RnnVadTest, SymmetricMatrixBufferUseCase) { + // Instance a ring buffer which will be fed with a series of integer values. + constexpr int kRingBufSize = 10; + RingBuffer<int, 1, kRingBufSize> ring_buf; + // Instance a symmetric matrix buffer for the ring buffer above. It stores + // pairs of integers with which this test can easily check that the evolution + // of RingBuffer and SymmetricMatrixBuffer match. + SymmetricMatrixBuffer<PairType, kRingBufSize> sym_matrix_buf; + for (int t = 1; t <= 100; ++t) { // Evolution steps. + SCOPED_TRACE(t); + const int t_removed = ring_buf.GetArrayView(kRingBufSize - 1)[0]; + ring_buf.Push({&t, 1}); + // The head of the ring buffer is `t`. + ASSERT_EQ(t, ring_buf.GetArrayView(0)[0]); + // Create the comparisons between `t` and the older elements in the ring + // buffer. + std::array<PairType, kRingBufSize - 1> new_comparions; + for (int i = 0; i < kRingBufSize - 1; ++i) { + // Start comparing `t` to the second newest element in the ring buffer. + const int delay = i + 1; + const auto t_prev = ring_buf.GetArrayView(delay)[0]; + ASSERT_EQ(std::max(0, t - delay), t_prev); + // Compare the last element `t` with `t_prev`. + new_comparions[i].first = t_prev; + new_comparions[i].second = t; + } + // Push the new comparisons in the symmetric matrix buffer. + sym_matrix_buf.Push({new_comparions.data(), new_comparions.size()}); + // Tests. + CheckSymmetry(&sym_matrix_buf); + // Check that the pairs resulting from the content in the ring buffer are + // in the right position. + for (int delay1 = 0; delay1 < kRingBufSize - 1; ++delay1) { + for (int delay2 = delay1 + 1; delay2 < kRingBufSize; ++delay2) { + const auto t1 = ring_buf.GetArrayView(delay1)[0]; + const auto t2 = ring_buf.GetArrayView(delay2)[0]; + ASSERT_LE(t2, t1); + const auto p = sym_matrix_buf.GetValue(delay1, delay2); + EXPECT_EQ(p.first, t2); + EXPECT_EQ(p.second, t1); + } + } + // Check that every older element in the ring buffer still has a + // corresponding pair in the symmetric matrix buffer. + for (int delay = 1; delay < kRingBufSize; ++delay) { + const auto t_prev = ring_buf.GetArrayView(delay)[0]; + EXPECT_TRUE(CheckPairsWithValueExist(&sym_matrix_buf, t_prev)); + } + // Check that the element removed from the ring buffer has no corresponding + // pairs in the symmetric matrix buffer. + if (t > kRingBufSize - 1) { + EXPECT_FALSE(CheckPairsWithValueExist(&sym_matrix_buf, t_removed)); + } + } +} + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc new file mode 100644 index 0000000000..857a9f2706 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" + +#include <algorithm> +#include <fstream> +#include <memory> +#include <string> +#include <type_traits> +#include <vector> + +#include "absl/strings/string_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +// File reader for binary files that contain a sequence of values with +// arithmetic type `T`. The values of type `T` that are read are cast to float. +template <typename T> +class FloatFileReader : public FileReader { + public: + static_assert(std::is_arithmetic<T>::value, ""); + explicit FloatFileReader(absl::string_view filename) + : is_(std::string(filename), std::ios::binary | std::ios::ate), + size_(is_.tellg() / sizeof(T)) { + RTC_CHECK(is_); + SeekBeginning(); + } + FloatFileReader(const FloatFileReader&) = delete; + FloatFileReader& operator=(const FloatFileReader&) = delete; + ~FloatFileReader() = default; + + int size() const override { return size_; } + bool ReadChunk(rtc::ArrayView<float> dst) override { + const std::streamsize bytes_to_read = dst.size() * sizeof(T); + if (std::is_same<T, float>::value) { + is_.read(reinterpret_cast<char*>(dst.data()), bytes_to_read); + } else { + buffer_.resize(dst.size()); + is_.read(reinterpret_cast<char*>(buffer_.data()), bytes_to_read); + std::transform(buffer_.begin(), buffer_.end(), dst.begin(), + [](const T& v) -> float { return static_cast<float>(v); }); + } + return is_.gcount() == bytes_to_read; + } + bool ReadValue(float& dst) override { return ReadChunk({&dst, 1}); } + void SeekForward(int hop) override { is_.seekg(hop * sizeof(T), is_.cur); } + void SeekBeginning() override { is_.seekg(0, is_.beg); } + + private: + std::ifstream is_; + const int size_; + std::vector<T> buffer_; +}; + +} // namespace + +using webrtc::test::ResourcePath; + +void ExpectEqualFloatArray(rtc::ArrayView<const float> expected, + rtc::ArrayView<const float> computed) { + ASSERT_EQ(expected.size(), computed.size()); + for (int i = 0; rtc::SafeLt(i, expected.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_FLOAT_EQ(expected[i], computed[i]); + } +} + +void ExpectNearAbsolute(rtc::ArrayView<const float> expected, + rtc::ArrayView<const float> computed, + float tolerance) { + ASSERT_EQ(expected.size(), computed.size()); + for (int i = 0; rtc::SafeLt(i, expected.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_NEAR(expected[i], computed[i], tolerance); + } +} + +std::unique_ptr<FileReader> CreatePcmSamplesReader() { + return std::make_unique<FloatFileReader<int16_t>>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/samples", + "pcm")); +} + +ChunksFileReader CreatePitchBuffer24kHzReader() { + auto reader = std::make_unique<FloatFileReader<float>>( + /*filename=*/test::ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_buf_24k", "dat")); + const int num_chunks = rtc::CheckedDivExact(reader->size(), kBufSize24kHz); + return {/*chunk_size=*/kBufSize24kHz, num_chunks, std::move(reader)}; +} + +ChunksFileReader CreateLpResidualAndPitchInfoReader() { + constexpr int kPitchInfoSize = 2; // Pitch period and strength. + constexpr int kChunkSize = kBufSize24kHz + kPitchInfoSize; + auto reader = std::make_unique<FloatFileReader<float>>( + /*filename=*/test::ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_lp_res", "dat")); + const int num_chunks = rtc::CheckedDivExact(reader->size(), kChunkSize); + return {kChunkSize, num_chunks, std::move(reader)}; +} + +std::unique_ptr<FileReader> CreateGruInputReader() { + return std::make_unique<FloatFileReader<float>>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/gru_in", + "dat")); +} + +std::unique_ptr<FileReader> CreateVadProbsReader() { + return std::make_unique<FloatFileReader<float>>( + /*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/vad_prob", + "dat")); +} + +PitchTestData::PitchTestData() { + FloatFileReader<float> reader( + /*filename=*/ResourcePath( + "audio_processing/agc2/rnn_vad/pitch_search_int", "dat")); + reader.ReadChunk(pitch_buffer_24k_); + reader.ReadChunk(square_energies_24k_); + reader.ReadChunk(auto_correlation_12k_); + // Reverse the order of the squared energy values. + // Required after the WebRTC CL 191703 which switched to forward computation. + std::reverse(square_energies_24k_.begin(), square_energies_24k_.end()); +} + +PitchTestData::~PitchTestData() = default; + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h new file mode 100644 index 0000000000..e64b7b7ecd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/test_utils.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ + +#include <array> +#include <fstream> +#include <memory> +#include <string> + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { +namespace rnn_vad { + +constexpr float kFloatMin = std::numeric_limits<float>::min(); + +// Fails for every pair from two equally sized rtc::ArrayView<float> views such +// that the values in the pair do not match. +void ExpectEqualFloatArray(rtc::ArrayView<const float> expected, + rtc::ArrayView<const float> computed); + +// Fails for every pair from two equally sized rtc::ArrayView<float> views such +// that their absolute error is above a given threshold. +void ExpectNearAbsolute(rtc::ArrayView<const float> expected, + rtc::ArrayView<const float> computed, + float tolerance); + +// File reader interface. +class FileReader { + public: + virtual ~FileReader() = default; + // Number of values in the file. + virtual int size() const = 0; + // Reads `dst.size()` float values into `dst`, advances the internal file + // position according to the number of read bytes and returns true if the + // values are correctly read. If the number of remaining bytes in the file is + // not sufficient to read `dst.size()` float values, `dst` is partially + // modified and false is returned. + virtual bool ReadChunk(rtc::ArrayView<float> dst) = 0; + // Reads a single float value, advances the internal file position according + // to the number of read bytes and returns true if the value is correctly + // read. If the number of remaining bytes in the file is not sufficient to + // read one float, `dst` is not modified and false is returned. + virtual bool ReadValue(float& dst) = 0; + // Advances the internal file position by `hop` float values. + virtual void SeekForward(int hop) = 0; + // Resets the internal file position to BOF. + virtual void SeekBeginning() = 0; +}; + +// File reader for files that contain `num_chunks` chunks with size equal to +// `chunk_size`. +struct ChunksFileReader { + const int chunk_size; + const int num_chunks; + std::unique_ptr<FileReader> reader; +}; + +// Creates a reader for the PCM S16 samples file. +std::unique_ptr<FileReader> CreatePcmSamplesReader(); + +// Creates a reader for the 24 kHz pitch buffer test data. +ChunksFileReader CreatePitchBuffer24kHzReader(); + +// Creates a reader for the LP residual and pitch information test data. +ChunksFileReader CreateLpResidualAndPitchInfoReader(); + +// Creates a reader for the sequence of GRU input vectors. +std::unique_ptr<FileReader> CreateGruInputReader(); + +// Creates a reader for the VAD probabilities test data. +std::unique_ptr<FileReader> CreateVadProbsReader(); + +// Class to retrieve a test pitch buffer content and the expected output for the +// analysis steps. +class PitchTestData { + public: + PitchTestData(); + ~PitchTestData(); + rtc::ArrayView<const float, kBufSize24kHz> PitchBuffer24kHzView() const { + return pitch_buffer_24k_; + } + rtc::ArrayView<const float, kRefineNumLags24kHz> SquareEnergies24kHzView() + const { + return square_energies_24k_; + } + rtc::ArrayView<const float, kNumLags12kHz> AutoCorrelation12kHzView() const { + return auto_correlation_12k_; + } + + private: + std::array<float, kBufSize24kHz> pitch_buffer_24k_; + std::array<float, kRefineNumLags24kHz> square_energies_24k_; + std::array<float, kNumLags12kHz> auto_correlation_12k_; +}; + +// Writer for binary files. +class FileWriter { + public: + explicit FileWriter(absl::string_view file_path) + : os_(std::string(file_path), std::ios::binary) {} + FileWriter(const FileWriter&) = delete; + FileWriter& operator=(const FileWriter&) = delete; + ~FileWriter() = default; + void WriteChunk(rtc::ArrayView<const float> value) { + const std::streamsize bytes_to_write = value.size() * sizeof(float); + os_.write(reinterpret_cast<const char*>(value.data()), bytes_to_write); + } + + private: + std::ofstream os_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_TEST_UTILS_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h new file mode 100644 index 0000000000..47f681196a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_HAS_NEON) +#include <arm_neon.h> +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif + +#include <numeric> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace rnn_vad { + +// Provides optimizations for mathematical operations having vectors as +// operand(s). +class VectorMath { + public: + explicit VectorMath(AvailableCpuFeatures cpu_features) + : cpu_features_(cpu_features) {} + + // Computes the dot product between two equally sized vectors. + float DotProduct(rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y) const { + RTC_DCHECK_EQ(x.size(), y.size()); +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (cpu_features_.avx2) { + return DotProductAvx2(x, y); + } else if (cpu_features_.sse2) { + __m128 accumulator = _mm_setzero_ps(); + constexpr int kBlockSizeLog2 = 2; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const __m128 x_i = _mm_loadu_ps(&x[i]); + const __m128 y_i = _mm_loadu_ps(&y[i]); + // Multiply-add. + const __m128 z_j = _mm_mul_ps(x_i, y_i); + accumulator = _mm_add_ps(accumulator, z_j); + } + // Reduce `accumulator` by addition. + __m128 high = _mm_movehl_ps(accumulator, accumulator); + accumulator = _mm_add_ps(accumulator, high); + high = _mm_shuffle_ps(accumulator, accumulator, 1); + accumulator = _mm_add_ps(accumulator, high); + float dot_product = _mm_cvtss_f32(accumulator); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; + i < rtc::dchecked_cast<int>(x.size()); ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; + } +#elif defined(WEBRTC_HAS_NEON) && defined(WEBRTC_ARCH_ARM64) + if (cpu_features_.neon) { + float32x4_t accumulator = vdupq_n_f32(0.f); + constexpr int kBlockSizeLog2 = 2; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const float32x4_t x_i = vld1q_f32(&x[i]); + const float32x4_t y_i = vld1q_f32(&y[i]); + accumulator = vfmaq_f32(accumulator, x_i, y_i); + } + // Reduce `accumulator` by addition. + const float32x2_t tmp = + vpadd_f32(vget_low_f32(accumulator), vget_high_f32(accumulator)); + float dot_product = vget_lane_f32(vpadd_f32(tmp, vrev64_f32(tmp)), 0); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; + i < rtc::dchecked_cast<int>(x.size()); ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; + } +#endif + return std::inner_product(x.begin(), x.end(), y.begin(), 0.f); + } + + private: + float DotProductAvx2(rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y) const; + + const AvailableCpuFeatures cpu_features_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_VECTOR_MATH_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc new file mode 100644 index 0000000000..e4d246d9ab --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +#include <immintrin.h> + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { +namespace rnn_vad { + +float VectorMath::DotProductAvx2(rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y) const { + RTC_DCHECK(cpu_features_.avx2); + RTC_DCHECK_EQ(x.size(), y.size()); + __m256 accumulator = _mm256_setzero_ps(); + constexpr int kBlockSizeLog2 = 3; + constexpr int kBlockSize = 1 << kBlockSizeLog2; + const int incomplete_block_index = (x.size() >> kBlockSizeLog2) + << kBlockSizeLog2; + for (int i = 0; i < incomplete_block_index; i += kBlockSize) { + RTC_DCHECK_LE(i + kBlockSize, x.size()); + const __m256 x_i = _mm256_loadu_ps(&x[i]); + const __m256 y_i = _mm256_loadu_ps(&y[i]); + accumulator = _mm256_fmadd_ps(x_i, y_i, accumulator); + } + // Reduce `accumulator` by addition. + __m128 high = _mm256_extractf128_ps(accumulator, 1); + __m128 low = _mm256_extractf128_ps(accumulator, 0); + low = _mm_add_ps(high, low); + high = _mm_movehl_ps(high, low); + low = _mm_add_ps(high, low); + high = _mm_shuffle_ps(low, low, 1); + low = _mm_add_ss(high, low); + float dot_product = _mm_cvtss_f32(low); + // Add the result for the last block if incomplete. + for (int i = incomplete_block_index; i < rtc::dchecked_cast<int>(x.size()); + ++i) { + dot_product += x[i] * y[i]; + } + return dot_product; +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build new file mode 100644 index 0000000000..a640c1993a --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2_gn/moz.build @@ -0,0 +1,173 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +CXXFLAGS += [ + "-mavx2", + "-mfma" +] + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_AVX2"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_avx2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_GNU_SOURCE"] = True + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +Library("vector_math_avx2_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build new file mode 100644 index 0000000000..f3e853fd55 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_gn/moz.build @@ -0,0 +1,204 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("vector_math_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc new file mode 100644 index 0000000000..45fd65d61e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/rnn_vad/vector_math_unittest.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/vector_math.h" + +#include <vector> + +#include "modules/audio_processing/agc2/cpu_features.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr int kSizeOfX = 19; +constexpr float kX[kSizeOfX] = { + 0.31593041f, 0.9350786f, -0.25252445f, -0.86956251f, -0.9673632f, + 0.54571901f, -0.72504495f, -0.79509912f, -0.25525012f, -0.73340473f, + 0.15747377f, -0.04370565f, 0.76135145f, -0.57239645f, 0.68616848f, + 0.3740298f, 0.34710799f, -0.92207423f, 0.10738454f}; +constexpr int kSizeOfXSubSpan = 16; +static_assert(kSizeOfXSubSpan < kSizeOfX, ""); +constexpr float kEnergyOfX = 7.315563958160327f; +constexpr float kEnergyOfXSubspan = 6.333327669592963f; + +class VectorMathParametrization + : public ::testing::TestWithParam<AvailableCpuFeatures> {}; + +TEST_P(VectorMathParametrization, TestDotProduct) { + VectorMath vector_math(/*cpu_features=*/GetParam()); + EXPECT_FLOAT_EQ(vector_math.DotProduct(kX, kX), kEnergyOfX); + EXPECT_FLOAT_EQ( + vector_math.DotProduct({kX, kSizeOfXSubSpan}, {kX, kSizeOfXSubSpan}), + kEnergyOfXSubspan); +} + +// Finds the relevant CPU features combinations to test. +std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() { + std::vector<AvailableCpuFeatures> v; + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false}); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.avx2) { + v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false}); + } + if (available.sse2) { + v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + } + if (available.neon) { + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/true}); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + VectorMathParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace rnn_vad +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc new file mode 100644 index 0000000000..961baf4cd3 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.cc @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector.h" + +#include <memory> + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +constexpr int kPeakEnveloperSuperFrameLengthMs = 400; +constexpr float kMinMarginDb = 12.0f; +constexpr float kMaxMarginDb = 25.0f; +constexpr float kAttack = 0.9988493699365052f; +constexpr float kDecay = 0.9997697679981565f; + +// Saturation protector state. Defined outside of `SaturationProtectorImpl` to +// implement check-point and restore ops. +struct SaturationProtectorState { + bool operator==(const SaturationProtectorState& s) const { + return headroom_db == s.headroom_db && + peak_delay_buffer == s.peak_delay_buffer && + max_peaks_dbfs == s.max_peaks_dbfs && + time_since_push_ms == s.time_since_push_ms; + } + inline bool operator!=(const SaturationProtectorState& s) const { + return !(*this == s); + } + + float headroom_db; + SaturationProtectorBuffer peak_delay_buffer; + float max_peaks_dbfs; + int time_since_push_ms; // Time since the last ring buffer push operation. +}; + +// Resets the saturation protector state. +void ResetSaturationProtectorState(float initial_headroom_db, + SaturationProtectorState& state) { + state.headroom_db = initial_headroom_db; + state.peak_delay_buffer.Reset(); + state.max_peaks_dbfs = kMinLevelDbfs; + state.time_since_push_ms = 0; +} + +// Updates `state` by analyzing the estimated speech level `speech_level_dbfs` +// and the peak level `peak_dbfs` for an observed frame. `state` must not be +// modified without calling this function. +void UpdateSaturationProtectorState(float peak_dbfs, + float speech_level_dbfs, + SaturationProtectorState& state) { + // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. + state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs); + state.time_since_push_ms += kFrameDurationMs; + if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) { + // Push `max_peaks_dbfs` back into the ring buffer. + state.peak_delay_buffer.PushBack(state.max_peaks_dbfs); + // Reset. + state.max_peaks_dbfs = kMinLevelDbfs; + state.time_since_push_ms = 0; + } + + // Update the headroom by comparing the estimated speech level and the delayed + // max speech peak. + const float delayed_peak_dbfs = + state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs); + const float difference_db = delayed_peak_dbfs - speech_level_dbfs; + if (difference_db > state.headroom_db) { + // Attack. + state.headroom_db = + state.headroom_db * kAttack + difference_db * (1.0f - kAttack); + } else { + // Decay. + state.headroom_db = + state.headroom_db * kDecay + difference_db * (1.0f - kDecay); + } + + state.headroom_db = + rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb); +} + +// Saturation protector which recommends a headroom based on the recent peaks. +class SaturationProtectorImpl : public SaturationProtector { + public: + explicit SaturationProtectorImpl(float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper) + : apm_data_dumper_(apm_data_dumper), + initial_headroom_db_(initial_headroom_db), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) { + Reset(); + } + SaturationProtectorImpl(const SaturationProtectorImpl&) = delete; + SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete; + ~SaturationProtectorImpl() = default; + + float HeadroomDb() override { return headroom_db_; } + + void Analyze(float speech_probability, + float peak_dbfs, + float speech_level_dbfs) override { + if (speech_probability < kVadConfidenceThreshold) { + // Not a speech frame. + if (adjacent_speech_frames_threshold_ > 1) { + // When two or more adjacent speech frames are required in order to + // update the state, we need to decide whether to discard or confirm the + // updates based on the speech sequence length. + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // First non-speech frame after a long enough sequence of speech + // frames. Update the reliable state. + reliable_state_ = preliminary_state_; + } else if (num_adjacent_speech_frames_ > 0) { + // First non-speech frame after a too short sequence of speech frames. + // Reset to the last reliable state. + preliminary_state_ = reliable_state_; + } + } + num_adjacent_speech_frames_ = 0; + } else { + // Speech frame observed. + num_adjacent_speech_frames_++; + + // Update preliminary level estimate. + UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs, + preliminary_state_); + + if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) { + // `preliminary_state_` is now reliable. Update the headroom. + headroom_db_ = preliminary_state_.headroom_db; + } + } + DumpDebugData(); + } + + void Reset() override { + num_adjacent_speech_frames_ = 0; + headroom_db_ = initial_headroom_db_; + ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_); + ResetSaturationProtectorState(initial_headroom_db_, reliable_state_); + } + + private: + void DumpDebugData() { + apm_data_dumper_->DumpRaw( + "agc2_saturation_protector_preliminary_max_peak_dbfs", + preliminary_state_.max_peaks_dbfs); + apm_data_dumper_->DumpRaw( + "agc2_saturation_protector_reliable_max_peak_dbfs", + reliable_state_.max_peaks_dbfs); + } + + ApmDataDumper* const apm_data_dumper_; + const float initial_headroom_db_; + const int adjacent_speech_frames_threshold_; + int num_adjacent_speech_frames_; + float headroom_db_; + SaturationProtectorState preliminary_state_; + SaturationProtectorState reliable_state_; +}; + +} // namespace + +std::unique_ptr<SaturationProtector> CreateSaturationProtector( + float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper) { + return std::make_unique<SaturationProtectorImpl>( + initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h new file mode 100644 index 0000000000..ef22145d5f --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ + +#include <memory> + +namespace webrtc { +class ApmDataDumper; + +// Saturation protector. Analyzes peak levels and recommends a headroom to +// reduce the chances of clipping. +class SaturationProtector { + public: + virtual ~SaturationProtector() = default; + + // Returns the recommended headroom in dB. + virtual float HeadroomDb() = 0; + + // Analyzes the peak level of a 10 ms frame along with its speech probability + // and the current speech level estimate to update the recommended headroom. + virtual void Analyze(float speech_probability, + float peak_dbfs, + float speech_level_dbfs) = 0; + + // Resets the internal state. + virtual void Reset() = 0; +}; + +// Creates a saturation protector that starts at `initial_headroom_db`. +std::unique_ptr<SaturationProtector> CreateSaturationProtector( + float initial_headroom_db, + int adjacent_speech_frames_threshold, + ApmDataDumper* apm_data_dumper); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc new file mode 100644 index 0000000000..41efdad2c8 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_compare.h" + +namespace webrtc { + +SaturationProtectorBuffer::SaturationProtectorBuffer() = default; + +SaturationProtectorBuffer::~SaturationProtectorBuffer() = default; + +bool SaturationProtectorBuffer::operator==( + const SaturationProtectorBuffer& b) const { + RTC_DCHECK_LE(size_, buffer_.size()); + RTC_DCHECK_LE(b.size_, b.buffer_.size()); + if (size_ != b.size_) { + return false; + } + for (int i = 0, i0 = FrontIndex(), i1 = b.FrontIndex(); i < size_; + ++i, ++i0, ++i1) { + if (buffer_[i0 % buffer_.size()] != b.buffer_[i1 % b.buffer_.size()]) { + return false; + } + } + return true; +} + +int SaturationProtectorBuffer::Capacity() const { + return buffer_.size(); +} + +int SaturationProtectorBuffer::Size() const { + return size_; +} + +void SaturationProtectorBuffer::Reset() { + next_ = 0; + size_ = 0; +} + +void SaturationProtectorBuffer::PushBack(float v) { + RTC_DCHECK_GE(next_, 0); + RTC_DCHECK_GE(size_, 0); + RTC_DCHECK_LT(next_, buffer_.size()); + RTC_DCHECK_LE(size_, buffer_.size()); + buffer_[next_++] = v; + if (rtc::SafeEq(next_, buffer_.size())) { + next_ = 0; + } + if (rtc::SafeLt(size_, buffer_.size())) { + size_++; + } +} + +absl::optional<float> SaturationProtectorBuffer::Front() const { + if (size_ == 0) { + return absl::nullopt; + } + RTC_DCHECK_LT(FrontIndex(), buffer_.size()); + return buffer_[FrontIndex()]; +} + +int SaturationProtectorBuffer::FrontIndex() const { + return rtc::SafeEq(size_, buffer_.size()) ? next_ : 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h new file mode 100644 index 0000000000..e17d0998c4 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ + +#include <array> + +#include "absl/types/optional.h" +#include "modules/audio_processing/agc2/agc2_common.h" + +namespace webrtc { + +// Ring buffer for the saturation protector which only supports (i) push back +// and (ii) read oldest item. +class SaturationProtectorBuffer { + public: + SaturationProtectorBuffer(); + ~SaturationProtectorBuffer(); + + bool operator==(const SaturationProtectorBuffer& b) const; + inline bool operator!=(const SaturationProtectorBuffer& b) const { + return !(*this == b); + } + + // Maximum number of values that the buffer can contain. + int Capacity() const; + + // Number of values in the buffer. + int Size() const; + + void Reset(); + + // Pushes back `v`. If the buffer is full, the oldest value is replaced. + void PushBack(float v); + + // Returns the oldest item in the buffer. Returns an empty value if the + // buffer is empty. + absl::optional<float> Front() const; + + private: + int FrontIndex() const; + // `buffer_` has `size_` elements (up to the size of `buffer_`) and `next_` is + // the position where the next new value is written in `buffer_`. + std::array<float, kSaturationProtectorBufferSize> buffer_; + int next_ = 0; + int size_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc new file mode 100644 index 0000000000..22187bf027 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_buffer_unittest.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector_buffer.h" + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::Eq; +using ::testing::Optional; + +TEST(GainController2SaturationProtectorBuffer, Init) { + SaturationProtectorBuffer b; + EXPECT_EQ(b.Size(), 0); + EXPECT_FALSE(b.Front().has_value()); +} + +TEST(GainController2SaturationProtectorBuffer, PushBack) { + SaturationProtectorBuffer b; + constexpr float kValue = 123.0f; + b.PushBack(kValue); + EXPECT_EQ(b.Size(), 1); + EXPECT_THAT(b.Front(), Optional(Eq(kValue))); +} + +TEST(GainController2SaturationProtectorBuffer, Reset) { + SaturationProtectorBuffer b; + b.PushBack(123.0f); + b.Reset(); + EXPECT_EQ(b.Size(), 0); + EXPECT_FALSE(b.Front().has_value()); +} + +// Checks that the front value does not change until the ring buffer gets full. +TEST(GainController2SaturationProtectorBuffer, FrontUntilBufferIsFull) { + SaturationProtectorBuffer b; + constexpr float kValue = 123.0f; + b.PushBack(kValue); + for (int i = 1; i < b.Capacity(); ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(b.Front(), Optional(Eq(kValue))); + b.PushBack(kValue + i); + } +} + +// Checks that when the buffer is full it behaves as a shift register. +TEST(GainController2SaturationProtectorBuffer, FrontIsDelayed) { + SaturationProtectorBuffer b; + // Fill the buffer. + for (int i = 0; i < b.Capacity(); ++i) { + b.PushBack(i); + } + // The ring buffer should now behave as a shift register with a delay equal to + // its capacity. + for (int i = b.Capacity(); i < 2 * b.Capacity() + 1; ++i) { + SCOPED_TRACE(i); + EXPECT_THAT(b.Front(), Optional(Eq(i - b.Capacity()))); + b.PushBack(i); + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc new file mode 100644 index 0000000000..3b104be8cd --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/saturation_protector_unittest.cc @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/saturation_protector.h" + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { + +constexpr float kInitialHeadroomDb = 20.0f; +constexpr int kNoAdjacentSpeechFramesRequired = 1; +constexpr float kMaxSpeechProbability = 1.0f; + +// Calls `Analyze(speech_probability, peak_dbfs, speech_level_dbfs)` +// `num_iterations` times on `saturation_protector` and return the largest +// headroom difference between two consecutive calls. +float RunOnConstantLevel(int num_iterations, + float speech_probability, + float peak_dbfs, + float speech_level_dbfs, + SaturationProtector& saturation_protector) { + float last_headroom = saturation_protector.HeadroomDb(); + float max_difference = 0.0f; + for (int i = 0; i < num_iterations; ++i) { + saturation_protector.Analyze(speech_probability, peak_dbfs, + speech_level_dbfs); + const float new_headroom = saturation_protector.HeadroomDb(); + max_difference = + std::max(max_difference, std::fabs(new_headroom - last_headroom)); + last_headroom = new_headroom; + } + return max_difference; +} + +// Checks that the returned headroom value is correctly reset. +TEST(GainController2SaturationProtector, Reset) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // Make sure that there are side-effects. + ASSERT_NE(initial_headroom_db, saturation_protector->HeadroomDb()); + saturation_protector->Reset(); + EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +// Checks that the estimate converges to the ratio between peaks and level +// estimator values after a while. +TEST(GainController2SaturationProtector, EstimatesCrestRatio) { + constexpr int kNumIterations = 2000; + constexpr float kPeakLevelDbfs = -20.0f; + constexpr float kCrestFactorDb = kInitialHeadroomDb + 1.0f; + constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb; + const float kMaxDifferenceDb = + 0.5f * std::fabs(kInitialHeadroomDb - kCrestFactorDb); + + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kSpeechLevelDbfs, *saturation_protector); + EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb, + kMaxDifferenceDb); +} + +// Checks that the headroom does not change too quickly. +TEST(GainController2SaturationProtector, ChangeSlowly) { + constexpr int kNumIterations = 1000; + constexpr float kPeakLevelDbfs = -20.f; + constexpr float kCrestFactorDb = kInitialHeadroomDb - 5.f; + constexpr float kOtherCrestFactorDb = kInitialHeadroomDb; + constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb; + constexpr float kOtherSpeechLevelDbfs = kPeakLevelDbfs - kOtherCrestFactorDb; + + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper); + float max_difference_db = + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kSpeechLevelDbfs, *saturation_protector); + max_difference_db = std::max( + RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs, + kOtherSpeechLevelDbfs, *saturation_protector), + max_difference_db); + constexpr float kMaxChangeSpeedDbPerSecond = 0.5f; // 1 db / 2 seconds. + EXPECT_LE(max_difference_db, + kMaxChangeSpeedDbPerSecond / 1000 * kFrameDurationMs); +} + +class SaturationProtectorParametrization + : public ::testing::TestWithParam<int> { + protected: + int adjacent_speech_frames_threshold() const { return GetParam(); } +}; + +TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1, + kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // No adaptation expected. + EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) { + ApmDataDumper apm_data_dumper(0); + auto saturation_protector = CreateSaturationProtector( + kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper); + const float initial_headroom_db = saturation_protector->HeadroomDb(); + RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1, + kMaxSpeechProbability, + /*peak_dbfs=*/0.0f, + /*speech_level_dbfs=*/-10.0f, *saturation_protector); + // Adaptation expected. + EXPECT_NE(initial_headroom_db, saturation_protector->HeadroomDb()); +} + +INSTANTIATE_TEST_SUITE_P(GainController2, + SaturationProtectorParametrization, + ::testing::Values(2, 9, 17)); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc new file mode 100644 index 0000000000..91448f8d86 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vad_wrapper.h" + +#include <array> +#include <utility> + +#include "api/array_view.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr int kNumFramesPerSecond = 100; + +class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad { + public: + explicit MonoVadImpl(const AvailableCpuFeatures& cpu_features) + : features_extractor_(cpu_features), rnn_vad_(cpu_features) {} + MonoVadImpl(const MonoVadImpl&) = delete; + MonoVadImpl& operator=(const MonoVadImpl&) = delete; + ~MonoVadImpl() = default; + + int SampleRateHz() const override { return rnn_vad::kSampleRate24kHz; } + void Reset() override { rnn_vad_.Reset(); } + float Analyze(rtc::ArrayView<const float> frame) override { + RTC_DCHECK_EQ(frame.size(), rnn_vad::kFrameSize10ms24kHz); + std::array<float, rnn_vad::kFeatureVectorSize> feature_vector; + const bool is_silence = features_extractor_.CheckSilenceComputeFeatures( + /*samples=*/{frame.data(), rnn_vad::kFrameSize10ms24kHz}, + feature_vector); + return rnn_vad_.ComputeVadProbability(feature_vector, is_silence); + } + + private: + rnn_vad::FeaturesExtractor features_extractor_; + rnn_vad::RnnVad rnn_vad_; +}; + +} // namespace + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + int vad_reset_period_ms, + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz) + : VoiceActivityDetectorWrapper(vad_reset_period_ms, + std::make_unique<MonoVadImpl>(cpu_features), + sample_rate_hz) {} + +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + int vad_reset_period_ms, + std::unique_ptr<MonoVad> vad, + int sample_rate_hz) + : vad_reset_period_frames_( + rtc::CheckedDivExact(vad_reset_period_ms, kFrameDurationMs)), + time_to_vad_reset_(vad_reset_period_frames_), + vad_(std::move(vad)) { + RTC_DCHECK(vad_); + RTC_DCHECK_GT(vad_reset_period_frames_, 1); + resampled_buffer_.resize( + rtc::CheckedDivExact(vad_->SampleRateHz(), kNumFramesPerSecond)); + Initialize(sample_rate_hz); +} + +VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default; + +void VoiceActivityDetectorWrapper::Initialize(int sample_rate_hz) { + RTC_DCHECK_GT(sample_rate_hz, 0); + frame_size_ = rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond); + int status = + resampler_.InitializeIfNeeded(sample_rate_hz, vad_->SampleRateHz(), + /*num_channels=*/1); + constexpr int kStatusOk = 0; + RTC_DCHECK_EQ(status, kStatusOk); + vad_->Reset(); +} + +float VoiceActivityDetectorWrapper::Analyze(AudioFrameView<const float> frame) { + // Periodically reset the VAD. + time_to_vad_reset_--; + if (time_to_vad_reset_ <= 0) { + vad_->Reset(); + time_to_vad_reset_ = vad_reset_period_frames_; + } + // Resample the first channel of `frame`. + RTC_DCHECK_EQ(frame.samples_per_channel(), frame_size_); + resampler_.Resample(frame.channel(0).data(), frame_size_, + resampled_buffer_.data(), resampled_buffer_.size()); + + return vad_->Analyze(resampled_buffer_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h new file mode 100644 index 0000000000..6df0ead271 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ + +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +// Wraps a single-channel Voice Activity Detector (VAD) which is used to analyze +// the first channel of the input audio frames. Takes care of resampling the +// input frames to match the sample rate of the wrapped VAD and periodically +// resets the VAD. +class VoiceActivityDetectorWrapper { + public: + // Single channel VAD interface. + class MonoVad { + public: + virtual ~MonoVad() = default; + // Returns the sample rate (Hz) required for the input frames analyzed by + // `ComputeProbability`. + virtual int SampleRateHz() const = 0; + // Resets the internal state. + virtual void Reset() = 0; + // Analyzes an audio frame and returns the speech probability. + virtual float Analyze(rtc::ArrayView<const float> frame) = 0; + }; + + // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call + // `MonoVad::Reset()`; it must be equal to or greater than the duration of two + // frames. Uses `cpu_features` to instantiate the default VAD. + VoiceActivityDetectorWrapper(int vad_reset_period_ms, + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz); + // Ctor. Uses a custom `vad`. + VoiceActivityDetectorWrapper(int vad_reset_period_ms, + std::unique_ptr<MonoVad> vad, + int sample_rate_hz); + + VoiceActivityDetectorWrapper(const VoiceActivityDetectorWrapper&) = delete; + VoiceActivityDetectorWrapper& operator=(const VoiceActivityDetectorWrapper&) = + delete; + ~VoiceActivityDetectorWrapper(); + + // Initializes the VAD wrapper. + void Initialize(int sample_rate_hz); + + // Analyzes the first channel of `frame` and returns the speech probability. + // `frame` must be a 10 ms frame with the sample rate specified in the last + // `Initialize()` call. + float Analyze(AudioFrameView<const float> frame); + + private: + const int vad_reset_period_frames_; + int frame_size_; + int time_to_vad_reset_; + PushResampler<float> resampler_; + std::unique_ptr<MonoVad> vad_; + std::vector<float> resampled_buffer_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_VAD_WRAPPER_H_ diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build new file mode 100644 index 0000000000..30e1f28164 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_gn/moz.build @@ -0,0 +1,212 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["ABSL_ALLOCATOR_NOTHROW"] = "1" +DEFINES["RTC_DAV1D_IN_INTERNAL_DECODER_FACTORY"] = True +DEFINES["RTC_ENABLE_VP9"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_LIBRARY_IMPL"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/third_party/libwebrtc/", + "/third_party/libwebrtc/third_party/abseil-cpp/", + "/tools/profiler/public" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION_ROLL"] = "r22_1" + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_GNU_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_LIBCPP_HAS_NO_ALIGNED_ALLOCATION"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORES"] = "0" + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_AURA"] = "1" + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_UDEV"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_GLIB"] = "1" + DEFINES["USE_OZONE"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["_LARGEFILE64_SOURCE"] = True + DEFINES["_LARGEFILE_SOURCE"] = True + DEFINES["__STDC_CONSTANT_MACROS"] = True + DEFINES["__STDC_FORMAT_MACROS"] = True + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "2" + DEFINES["UNICODE"] = True + DEFINES["USE_AURA"] = "1" + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINAPI_FAMILY"] = "WINAPI_FAMILY_DESKTOP_APP" + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_ENABLE_EXTENDED_ALIGNED_STORAGE"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_HAS_NODISCARD"] = True + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "crypt32", + "iphlpapi", + "secur32", + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_DEBUG"] = True + +if CONFIG["MOZ_DEBUG"] == "1" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["_HAS_ITERATOR_DEBUGGING"] = "0" + +if CONFIG["MOZ_X11"] == "1" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_X11"] = "1" + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support", + "unwind" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + OS_LIBS += [ + "android_support" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "arm" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_ENABLE_AVX2"] = True + DEFINES["_GNU_SOURCE"] = True + +Library("vad_wrapper_gn") diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc new file mode 100644 index 0000000000..91efdb566e --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vad_wrapper_unittest.cc @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vad_wrapper.h" + +#include <limits> +#include <memory> +#include <tuple> +#include <utility> +#include <vector> + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/gunit.h" +#include "rtc_base/numerics/safe_compare.h" +#include "test/gmock.h" + +namespace webrtc { +namespace { + +using ::testing::AnyNumber; +using ::testing::Return; +using ::testing::ReturnRoundRobin; +using ::testing::Truly; + +constexpr int kNumFramesPerSecond = 100; + +constexpr int kNoVadPeriodicReset = + kFrameDurationMs * (std::numeric_limits<int>::max() / kFrameDurationMs); + +constexpr int kSampleRate8kHz = 8000; + +class MockVad : public VoiceActivityDetectorWrapper::MonoVad { + public: + MOCK_METHOD(int, SampleRateHz, (), (const, override)); + MOCK_METHOD(void, Reset, (), (override)); + MOCK_METHOD(float, Analyze, (rtc::ArrayView<const float> frame), (override)); +}; + +// Checks that the ctor and `Initialize()` read the sample rate of the wrapped +// VAD. +TEST(GainController2VoiceActivityDetectorWrapper, CtorAndInitReadSampleRate) { + auto vad = std::make_unique<MockVad>(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(2) + .WillRepeatedly(Return(kSampleRate8kHz)); + EXPECT_CALL(*vad, Reset).Times(AnyNumber()); + auto vad_wrapper = std::make_unique<VoiceActivityDetectorWrapper>( + kNoVadPeriodicReset, std::move(vad), kSampleRate8kHz); +} + +// Creates a `VoiceActivityDetectorWrapper` injecting a mock VAD that +// repeatedly returns the next value from `speech_probabilities` and that +// restarts from the beginning when after the last element is returned. +std::unique_ptr<VoiceActivityDetectorWrapper> CreateMockVadWrapper( + int vad_reset_period_ms, + int sample_rate_hz, + const std::vector<float>& speech_probabilities, + int expected_vad_reset_calls) { + auto vad = std::make_unique<MockVad>(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(AnyNumber()) + .WillRepeatedly(Return(sample_rate_hz)); + if (expected_vad_reset_calls >= 0) { + EXPECT_CALL(*vad, Reset).Times(expected_vad_reset_calls); + } + EXPECT_CALL(*vad, Analyze) + .Times(AnyNumber()) + .WillRepeatedly(ReturnRoundRobin(speech_probabilities)); + return std::make_unique<VoiceActivityDetectorWrapper>( + vad_reset_period_ms, std::move(vad), kSampleRate8kHz); +} + +// 10 ms mono frame. +struct FrameWithView { + // Ctor. Initializes the frame samples with `value`. + explicit FrameWithView(int sample_rate_hz) + : samples(rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond), + 0.0f), + channel0(samples.data()), + view(&channel0, /*num_channels=*/1, samples.size()) {} + std::vector<float> samples; + const float* const channel0; + const AudioFrameView<const float> view; +}; + +// Checks that the expected speech probabilities are returned. +TEST(GainController2VoiceActivityDetectorWrapper, CheckSpeechProbabilities) { + const std::vector<float> speech_probabilities{0.709f, 0.484f, 0.882f, 0.167f, + 0.44f, 0.525f, 0.858f, 0.314f, + 0.653f, 0.965f, 0.413f, 0.0f}; + auto vad_wrapper = CreateMockVadWrapper(kNoVadPeriodicReset, kSampleRate8kHz, + speech_probabilities, + /*expected_vad_reset_calls=*/1); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) { + SCOPED_TRACE(i); + EXPECT_EQ(speech_probabilities[i], vad_wrapper->Analyze(frame.view)); + } +} + +// Checks that the VAD is not periodically reset. +TEST(GainController2VoiceActivityDetectorWrapper, VadNoPeriodicReset) { + constexpr int kNumFrames = 19; + auto vad_wrapper = CreateMockVadWrapper(kNoVadPeriodicReset, kSampleRate8kHz, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/1); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; i < kNumFrames; ++i) { + vad_wrapper->Analyze(frame.view); + } +} + +class VadPeriodResetParametrization + : public ::testing::TestWithParam<std::tuple<int, int>> { + protected: + int num_frames() const { return std::get<0>(GetParam()); } + int vad_reset_period_frames() const { return std::get<1>(GetParam()); } +}; + +// Checks that the VAD is periodically reset with the expected period. +TEST_P(VadPeriodResetParametrization, VadPeriodicReset) { + auto vad_wrapper = CreateMockVadWrapper( + /*vad_reset_period_ms=*/vad_reset_period_frames() * kFrameDurationMs, + kSampleRate8kHz, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/1 + + num_frames() / vad_reset_period_frames()); + FrameWithView frame(kSampleRate8kHz); + for (int i = 0; i < num_frames(); ++i) { + vad_wrapper->Analyze(frame.view); + } +} + +INSTANTIATE_TEST_SUITE_P(GainController2VoiceActivityDetectorWrapper, + VadPeriodResetParametrization, + ::testing::Combine(::testing::Values(1, 19, 123), + ::testing::Values(2, 5, 20, 53))); + +class VadResamplingParametrization + : public ::testing::TestWithParam<std::tuple<int, int>> { + protected: + int input_sample_rate_hz() const { return std::get<0>(GetParam()); } + int vad_sample_rate_hz() const { return std::get<1>(GetParam()); } +}; + +// Checks that regardless of the input audio sample rate, the wrapped VAD +// analyzes frames having the expected size, that is according to its internal +// sample rate. +TEST_P(VadResamplingParametrization, CheckResampledFrameSize) { + auto vad = std::make_unique<MockVad>(); + EXPECT_CALL(*vad, SampleRateHz) + .Times(AnyNumber()) + .WillRepeatedly(Return(vad_sample_rate_hz())); + EXPECT_CALL(*vad, Reset).Times(1); + EXPECT_CALL(*vad, Analyze(Truly([this](rtc::ArrayView<const float> frame) { + return rtc::SafeEq(frame.size(), rtc::CheckedDivExact(vad_sample_rate_hz(), + kNumFramesPerSecond)); + }))).Times(1); + auto vad_wrapper = std::make_unique<VoiceActivityDetectorWrapper>( + kNoVadPeriodicReset, std::move(vad), input_sample_rate_hz()); + FrameWithView frame(input_sample_rate_hz()); + vad_wrapper->Analyze(frame.view); +} + +INSTANTIATE_TEST_SUITE_P( + GainController2VoiceActivityDetectorWrapper, + VadResamplingParametrization, + ::testing::Combine(::testing::Values(8000, 16000, 44100, 48000), + ::testing::Values(6000, 8000, 12000, 16000, 24000))); + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc new file mode 100644 index 0000000000..a70d815196 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vector_float_frame.h" + +namespace webrtc { + +namespace { + +std::vector<float*> ConstructChannelPointers( + std::vector<std::vector<float>>* x) { + std::vector<float*> channel_ptrs; + for (auto& v : *x) { + channel_ptrs.push_back(v.data()); + } + return channel_ptrs; +} +} // namespace + +VectorFloatFrame::VectorFloatFrame(int num_channels, + int samples_per_channel, + float start_value) + : channels_(num_channels, + std::vector<float>(samples_per_channel, start_value)), + channel_ptrs_(ConstructChannelPointers(&channels_)), + float_frame_view_(channel_ptrs_.data(), + channels_.size(), + samples_per_channel) {} + +VectorFloatFrame::~VectorFloatFrame() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h new file mode 100644 index 0000000000..b521f346f9 --- /dev/null +++ b/third_party/libwebrtc/modules/audio_processing/agc2/vector_float_frame.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ + +#include <vector> + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +// A construct consisting of a multi-channel audio frame, and a FloatFrame view +// of it. +class VectorFloatFrame { + public: + VectorFloatFrame(int num_channels, + int samples_per_channel, + float start_value); + const AudioFrameView<float>& float_frame_view() { return float_frame_view_; } + AudioFrameView<const float> float_frame_view() const { + return float_frame_view_; + } + + ~VectorFloatFrame(); + + private: + std::vector<std::vector<float>> channels_; + std::vector<float*> channel_ptrs_; + AudioFrameView<float> float_frame_view_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_VECTOR_FLOAT_FRAME_H_ |