diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/libwebrtc/webrtc/modules/audio_processing | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/libwebrtc/webrtc/modules/audio_processing')
478 files changed, 91782 insertions, 0 deletions
diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/BUILD.gn b/third_party/libwebrtc/webrtc/modules/audio_processing/BUILD.gn new file mode 100644 index 0000000000..23eefc22c7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/BUILD.gn @@ -0,0 +1,905 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("//build/config/arm.gni") +import("../../webrtc.gni") +if (!build_with_mozilla) { + import("//third_party/protobuf/proto_library.gni") +} + +declare_args() { + # Disables the usual mode where we trust the reported system delay + # values the AEC receives. The corresponding define is set appropriately + # in the code, but it can be force-enabled here for testing. + aec_untrusted_delay_for_testing = false +} + +rtc_static_library("audio_processing") { + sources = [ + "aec/aec_core.cc", + "aec/aec_core.h", + "aec/aec_core_optimized_methods.h", + "aec/aec_resampler.cc", + "aec/aec_resampler.h", + "aec/echo_cancellation.cc", + "aec/echo_cancellation.h", + "aec3/adaptive_fir_filter.cc", + "aec3/adaptive_fir_filter.h", + "aec3/aec3_common.cc", + "aec3/aec3_common.h", + "aec3/aec3_fft.cc", + "aec3/aec3_fft.h", + "aec3/aec_state.cc", + "aec3/aec_state.h", + "aec3/block_framer.cc", + "aec3/block_framer.h", + "aec3/block_processor.cc", + "aec3/block_processor.h", + "aec3/block_processor_metrics.cc", + "aec3/block_processor_metrics.h", + "aec3/cascaded_biquad_filter.cc", + "aec3/cascaded_biquad_filter.h", + "aec3/comfort_noise_generator.cc", + "aec3/comfort_noise_generator.h", + "aec3/decimator.cc", + "aec3/decimator.h", + "aec3/downsampled_render_buffer.cc", + "aec3/downsampled_render_buffer.h", + "aec3/echo_canceller3.cc", + "aec3/echo_canceller3.h", + "aec3/echo_path_delay_estimator.cc", + "aec3/echo_path_delay_estimator.h", + "aec3/echo_path_variability.cc", + "aec3/echo_path_variability.h", + "aec3/echo_remover.cc", + "aec3/echo_remover.h", + "aec3/echo_remover_metrics.cc", + "aec3/echo_remover_metrics.h", + "aec3/erl_estimator.cc", + "aec3/erl_estimator.h", + "aec3/erle_estimator.cc", + "aec3/erle_estimator.h", + "aec3/fft_data.h", + "aec3/frame_blocker.cc", + "aec3/frame_blocker.h", + "aec3/main_filter_update_gain.cc", + "aec3/main_filter_update_gain.h", + "aec3/matched_filter.cc", + "aec3/matched_filter.h", + "aec3/matched_filter_lag_aggregator.cc", + "aec3/matched_filter_lag_aggregator.h", + "aec3/output_selector.cc", + "aec3/output_selector.h", + "aec3/render_buffer.cc", + "aec3/render_buffer.h", + "aec3/render_delay_buffer.cc", + "aec3/render_delay_buffer.h", + "aec3/render_delay_controller.cc", + "aec3/render_delay_controller.h", + "aec3/render_delay_controller_metrics.cc", + "aec3/render_delay_controller_metrics.h", + "aec3/render_signal_analyzer.cc", + "aec3/render_signal_analyzer.h", + "aec3/residual_echo_estimator.cc", + "aec3/residual_echo_estimator.h", + "aec3/shadow_filter_update_gain.cc", + "aec3/shadow_filter_update_gain.h", + "aec3/subtractor.cc", + "aec3/subtractor.h", + "aec3/subtractor_output.h", + "aec3/suppression_filter.cc", + "aec3/suppression_filter.h", + "aec3/suppression_gain.cc", + "aec3/suppression_gain.h", + "aec3/vector_math.h", + "aecm/aecm_core.cc", + "aecm/aecm_core.h", + "aecm/echo_control_mobile.cc", + "aecm/echo_control_mobile.h", + "agc/agc.cc", + "agc/agc.h", + "agc/agc_manager_direct.cc", + "agc/agc_manager_direct.h", + "agc/gain_map_internal.h", + "agc/loudness_histogram.cc", + "agc/loudness_histogram.h", + "agc/utility.cc", + "agc/utility.h", + "agc2/gain_controller2.cc", + "agc2/gain_controller2.h", + "audio_buffer.cc", + "audio_buffer.h", + "audio_processing_impl.cc", + "audio_processing_impl.h", + "beamformer/array_util.cc", + "beamformer/array_util.h", + "beamformer/complex_matrix.h", + "beamformer/covariance_matrix_generator.cc", + "beamformer/covariance_matrix_generator.h", + "beamformer/matrix.h", + "beamformer/nonlinear_beamformer.cc", + "beamformer/nonlinear_beamformer.h", + "common.h", + "echo_cancellation_impl.cc", + "echo_cancellation_impl.h", + "echo_control_mobile_impl.cc", + "echo_control_mobile_impl.h", + "echo_detector/circular_buffer.cc", + "echo_detector/circular_buffer.h", + "echo_detector/mean_variance_estimator.cc", + "echo_detector/mean_variance_estimator.h", + "echo_detector/moving_max.cc", + "echo_detector/moving_max.h", + "echo_detector/normalized_covariance_estimator.cc", + "echo_detector/normalized_covariance_estimator.h", + "gain_control_for_experimental_agc.cc", + "gain_control_for_experimental_agc.h", + "gain_control_impl.cc", + "gain_control_impl.h", + "include/audio_processing.cc", + "include/audio_processing.h", + "include/config.cc", + "include/config.h", + "level_controller/biquad_filter.cc", + "level_controller/biquad_filter.h", + "level_controller/down_sampler.cc", + "level_controller/down_sampler.h", + "level_controller/gain_applier.cc", + "level_controller/gain_applier.h", + "level_controller/gain_selector.cc", + "level_controller/gain_selector.h", + "level_controller/level_controller.cc", + "level_controller/level_controller.h", + "level_controller/level_controller_constants.h", + "level_controller/noise_level_estimator.cc", + "level_controller/noise_level_estimator.h", + "level_controller/noise_spectrum_estimator.cc", + "level_controller/noise_spectrum_estimator.h", + "level_controller/peak_level_estimator.cc", + "level_controller/peak_level_estimator.h", + "level_controller/saturating_gain_estimator.cc", + "level_controller/saturating_gain_estimator.h", + "level_controller/signal_classifier.cc", + "level_controller/signal_classifier.h", + "level_estimator_impl.cc", + "level_estimator_impl.h", + "logging/apm_data_dumper.cc", + "logging/apm_data_dumper.h", + "low_cut_filter.cc", + "low_cut_filter.h", + "noise_suppression_impl.cc", + "noise_suppression_impl.h", + "render_queue_item_verifier.h", + "residual_echo_detector.cc", + "residual_echo_detector.h", + "rms_level.cc", + "rms_level.h", + "splitting_filter.cc", + "splitting_filter.h", + "three_band_filter_bank.cc", + "three_band_filter_bank.h", + "transient/common.h", + "transient/daubechies_8_wavelet_coeffs.h", + "transient/dyadic_decimator.h", + "transient/moving_moments.cc", + "transient/moving_moments.h", + "transient/transient_detector.cc", + "transient/transient_detector.h", + "transient/transient_suppressor.cc", + "transient/transient_suppressor.h", + "transient/wpd_node.cc", + "transient/wpd_node.h", + "transient/wpd_tree.cc", + "transient/wpd_tree.h", + "typing_detection.cc", + "typing_detection.h", + "utility/block_mean_calculator.cc", + "utility/block_mean_calculator.h", + "utility/delay_estimator.cc", + "utility/delay_estimator.h", + "utility/delay_estimator_internal.h", + "utility/delay_estimator_wrapper.cc", + "utility/delay_estimator_wrapper.h", + "utility/ooura_fft.cc", + "utility/ooura_fft.h", + "utility/ooura_fft_tables_common.h", + "vad/common.h", + "vad/gmm.cc", + "vad/gmm.h", + "vad/noise_gmm_tables.h", + "vad/pitch_based_vad.cc", + "vad/pitch_based_vad.h", + "vad/pitch_internal.cc", + "vad/pitch_internal.h", + "vad/pole_zero_filter.cc", + "vad/pole_zero_filter.h", + "vad/standalone_vad.cc", + "vad/standalone_vad.h", + "vad/vad_audio_proc.cc", + "vad/vad_audio_proc.h", + "vad/vad_audio_proc_internal.h", + "vad/vad_circular_buffer.cc", + "vad/vad_circular_buffer.h", + "vad/voice_activity_detector.cc", + "vad/voice_activity_detector.h", + "vad/voice_gmm_tables.h", + "voice_detection_impl.cc", + "voice_detection_impl.h", + ] + + defines = [] + deps = [ + ":aec_dump_interface", + ":audio_processing_statistics", + "..:module_api", + "../..:webrtc_common", + "../../api:array_view", + "../../api:optional", + "../../audio/utility:audio_frame_operations", + "../../rtc_base:gtest_prod", + "../../rtc_base:protobuf_utils", + "../../system_wrappers:field_trial_api", + "../audio_coding:isac", + ] + public_deps = [ + ":audio_processing_c", + ] + + if (apm_debug_dump) { + defines += [ "WEBRTC_APM_DEBUG_DUMP=1" ] + } else { + defines += [ "WEBRTC_APM_DEBUG_DUMP=0" ] + } + + if (aec_untrusted_delay_for_testing) { + defines += [ "WEBRTC_UNTRUSTED_DELAY" ] + } + + if (rtc_enable_protobuf) { + defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ] + deps += [ ":audioproc_debug_proto" ] + } + + if (rtc_enable_intelligibility_enhancer) { + defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ] + sources += [ + "intelligibility/intelligibility_enhancer.cc", + "intelligibility/intelligibility_enhancer.h", + "intelligibility/intelligibility_utils.cc", + "intelligibility/intelligibility_utils.h", + ] + } else { + defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ] + } + + if (rtc_prefer_fixed_point) { + defines += [ "WEBRTC_NS_FIXED" ] + } else { + defines += [ "WEBRTC_NS_FLOAT" ] + } + + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":audio_processing_sse2" ] + } + + if (rtc_build_with_neon) { + deps += [ ":audio_processing_neon" ] + } + + if (current_cpu == "mipsel") { + sources += [ "aecm/aecm_core_mips.cc" ] + if (mips_float_abi == "hard") { + sources += [ + "aec/aec_core_mips.cc", + "utility/ooura_fft_mips.cc", + ] + } + } else { + sources += [ "aecm/aecm_core_c.cc" ] + } + + # TODO(jschuh): Bug 1348: fix this warning. + configs += [ "//build/config/compiler:no_size_t_to_int_warning" ] + + deps += [ + "../../common_audio", + "../../common_audio:fir_filter", + "../../common_audio:fir_filter_factory", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers", + ] +} + +rtc_source_set("audio_processing_statistics") { + sources = [ + "include/audio_processing_statistics.cc", + "include/audio_processing_statistics.h", + ] + deps = [ + "../../api:optional", + ] +} + +rtc_source_set("aec_dump_interface") { + sources = [ + "include/aec_dump.cc", + "include/aec_dump.h", + ] + + deps = [ + "../../api:array_view", + "../../rtc_base:rtc_base_approved", + ] +} + +rtc_source_set("audio_processing_c") { + visibility = [ ":*" ] # Only targets in this file can depend on this. + sources = [ + "agc/legacy/analog_agc.c", + "agc/legacy/analog_agc.h", + "agc/legacy/digital_agc.c", + "agc/legacy/digital_agc.h", + "agc/legacy/gain_control.h", + ] + + if (rtc_prefer_fixed_point) { + sources += [ + "ns/noise_suppression_x.c", + "ns/noise_suppression_x.h", + "ns/nsx_core.c", + "ns/nsx_core.h", + "ns/nsx_defines.h", + ] + if (current_cpu == "mipsel") { + sources += [ "ns/nsx_core_mips.c" ] + } else { + sources += [ "ns/nsx_core_c.c" ] + } + } else { + sources += [ + "ns/defines.h", + "ns/noise_suppression.c", + "ns/noise_suppression.h", + "ns/ns_core.c", + "ns/ns_core.h", + "ns/windows_private.h", + ] + } + + deps = [ + "../..:webrtc_common", + "../../common_audio", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers", + ] + + if (rtc_build_with_neon) { + deps += [ ":audio_processing_neon_c" ] + } +} + +if (rtc_enable_protobuf) { + proto_library("audioproc_debug_proto") { + sources = [ + "debug.proto", + ] + + proto_out_dir = "modules/audio_processing" + } +} + +if (current_cpu == "x86" || current_cpu == "x64") { + rtc_static_library("audio_processing_sse2") { + # TODO(ehmaldonado): Remove (bugs.webrtc.org/6828) + # Errors on cyclic dependency with :audio_processing if enabled. + check_includes = false + + sources = [ + "aec/aec_core_sse2.cc", + "utility/ooura_fft_sse2.cc", + "utility/ooura_fft_tables_neon_sse2.h", + ] + + if (is_posix) { + cflags = [ "-msse2" ] + } + + if (apm_debug_dump) { + defines = [ "WEBRTC_APM_DEBUG_DUMP=1" ] + } else { + defines = [ "WEBRTC_APM_DEBUG_DUMP=0" ] + } + } +} + +if (rtc_build_with_neon) { + rtc_static_library("audio_processing_neon") { + # TODO(ehmaldonado): Remove (bugs.webrtc.org/6828) + # Errors on cyclic dependency with :audio_processing if enabled. + check_includes = false + + sources = [ + "aec/aec_core_neon.cc", + "aecm/aecm_core_neon.cc", + "utility/ooura_fft_neon.cc", + "utility/ooura_fft_tables_neon_sse2.h", + ] + + if (current_cpu != "arm64") { + # Enable compilation for the NEON instruction set. This is needed + # since //build/config/arm.gni only enables NEON for iOS, not Android. + # This provides the same functionality as webrtc/build/arm_neon.gypi. + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + # Disable LTO on NEON targets due to compiler bug. + # TODO(fdegans): Enable this. See crbug.com/408997. + if (rtc_use_lto) { + cflags -= [ + "-flto", + "-ffat-lto-objects", + ] + } + + deps = [ + "../../common_audio", + ] + public_deps = [ + ":audio_processing_neon_c", + ] + + if (apm_debug_dump) { + defines = [ "WEBRTC_APM_DEBUG_DUMP=1" ] + } else { + defines = [ "WEBRTC_APM_DEBUG_DUMP=0" ] + } + } + + rtc_static_library("audio_processing_neon_c") { + # TODO(mbonadei): Remove (bugs.webrtc.org/6828) + # Errors on cyclic dependency with :audio_processing_c if enabled. + check_includes = false + + sources = [ + "ns/nsx_core_neon.c", + ] + + if (current_cpu != "arm64") { + # Enable compilation for the NEON instruction set. This is needed + # since //build/config/arm.gni only enables NEON for iOS, not Android. + # This provides the same functionality as webrtc/build/arm_neon.gypi. + suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + # Disable LTO on NEON targets due to compiler bug. + # TODO(fdegans): Enable this. See crbug.com/408997. + if (rtc_use_lto) { + cflags -= [ + "-flto", + "-ffat-lto-objects", + ] + } + deps = [ + "../../rtc_base:rtc_base_approved", + ] + } +} + +if (rtc_include_tests) { + group("audio_processing_tests") { + testonly = true + public_deps = [ + ":audioproc_test_utils", + ":click_annotate", + ":nonlinear_beamformer_test", + ":transient_suppression_test", + ] + + if (rtc_enable_intelligibility_enhancer) { + public_deps += [ ":intelligibility_proc" ] + } + + if (rtc_enable_protobuf) { + public_deps += [ + ":audioproc_f", + ":audioproc_unittest_proto", + ":unpack_aecdump", + "aec_dump:aec_dump_unittests", + "test/conversational_speech", + "test/py_quality_assessment", + ] + } + } + + rtc_source_set("audio_processing_unittests") { + testonly = true + + sources = [ + "aec/echo_cancellation_unittest.cc", + "aec/system_delay_unittest.cc", + "agc/agc_manager_direct_unittest.cc", + "agc/loudness_histogram_unittest.cc", + "agc/mock_agc.h", + "audio_buffer_unittest.cc", + "beamformer/array_util_unittest.cc", + "beamformer/complex_matrix_unittest.cc", + "beamformer/covariance_matrix_generator_unittest.cc", + "beamformer/matrix_unittest.cc", + "beamformer/mock_nonlinear_beamformer.h", + "config_unittest.cc", + "echo_cancellation_impl_unittest.cc", + "splitting_filter_unittest.cc", + "test/fake_recording_device_unittest.cc", + "transient/dyadic_decimator_unittest.cc", + "transient/file_utils.cc", + "transient/file_utils.h", + "transient/file_utils_unittest.cc", + "transient/moving_moments_unittest.cc", + "transient/transient_detector_unittest.cc", + "transient/transient_suppressor_unittest.cc", + "transient/wpd_node_unittest.cc", + "transient/wpd_tree_unittest.cc", + "utility/block_mean_calculator_unittest.cc", + "utility/delay_estimator_unittest.cc", + "vad/gmm_unittest.cc", + "vad/pitch_based_vad_unittest.cc", + "vad/pitch_internal_unittest.cc", + "vad/pole_zero_filter_unittest.cc", + "vad/standalone_vad_unittest.cc", + "vad/vad_audio_proc_unittest.cc", + "vad/vad_circular_buffer_unittest.cc", + "vad/voice_activity_detector_unittest.cc", + ] + + deps = [ + ":analog_mic_simulation", + ":audio_processing", + ":audioproc_test_utils", + "..:module_api", + "../..:webrtc_common", + "../../api:array_view", + "../../api:optional", + "../../common_audio:common_audio", + "../../rtc_base:gtest_prod", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_base", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:system_wrappers", + "../../test:test_support", + "../audio_coding:neteq_input_audio_tools", + "aec_dump:mock_aec_dump_unittests", + "test/conversational_speech:unittest", + "//testing/gmock", + "//testing/gtest", + ] + + defines = [] + + if (apm_debug_dump) { + defines += [ "WEBRTC_APM_DEBUG_DUMP=1" ] + } else { + defines += [ "WEBRTC_APM_DEBUG_DUMP=0" ] + } + + if (rtc_enable_intelligibility_enhancer) { + defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ] + sources += [ + "intelligibility/intelligibility_enhancer_unittest.cc", + "intelligibility/intelligibility_utils_unittest.cc", + ] + } else { + defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ] + } + + if (rtc_prefer_fixed_point) { + defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ] + } else { + defines += [ "WEBRTC_AUDIOPROC_FLOAT_PROFILE" ] + } + + if (rtc_enable_protobuf) { + defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ] + deps += [ + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + ":audioproc_test_utils", + ":audioproc_unittest_proto", + "../../rtc_base:rtc_task_queue", + "aec_dump", + "aec_dump:aec_dump_unittests", + ] + sources += [ + "aec3/adaptive_fir_filter_unittest.cc", + "aec3/aec3_fft_unittest.cc", + "aec3/aec_state_unittest.cc", + "aec3/block_framer_unittest.cc", + "aec3/block_processor_metrics_unittest.cc", + "aec3/block_processor_unittest.cc", + "aec3/cascaded_biquad_filter_unittest.cc", + "aec3/comfort_noise_generator_unittest.cc", + "aec3/decimator_unittest.cc", + "aec3/echo_canceller3_unittest.cc", + "aec3/echo_path_delay_estimator_unittest.cc", + "aec3/echo_path_variability_unittest.cc", + "aec3/echo_remover_metrics_unittest.cc", + "aec3/echo_remover_unittest.cc", + "aec3/erl_estimator_unittest.cc", + "aec3/erle_estimator_unittest.cc", + "aec3/fft_data_unittest.cc", + "aec3/frame_blocker_unittest.cc", + "aec3/main_filter_update_gain_unittest.cc", + "aec3/matched_filter_lag_aggregator_unittest.cc", + "aec3/matched_filter_unittest.cc", + "aec3/output_selector_unittest.cc", + "aec3/render_buffer_unittest.cc", + "aec3/render_delay_buffer_unittest.cc", + "aec3/render_delay_controller_metrics_unittest.cc", + "aec3/render_delay_controller_unittest.cc", + "aec3/render_signal_analyzer_unittest.cc", + "aec3/residual_echo_estimator_unittest.cc", + "aec3/shadow_filter_update_gain_unittest.cc", + "aec3/subtractor_unittest.cc", + "aec3/suppression_filter_unittest.cc", + "aec3/suppression_gain_unittest.cc", + "aec3/vector_math_unittest.cc", + "agc2/gain_controller2_unittest.cc", + "audio_processing_impl_locking_unittest.cc", + "audio_processing_impl_unittest.cc", + "audio_processing_unittest.cc", + "beamformer/nonlinear_beamformer_unittest.cc", + "echo_cancellation_bit_exact_unittest.cc", + "echo_control_mobile_unittest.cc", + "echo_detector/circular_buffer_unittest.cc", + "echo_detector/mean_variance_estimator_unittest.cc", + "echo_detector/moving_max_unittest.cc", + "echo_detector/normalized_covariance_estimator_unittest.cc", + "gain_control_unittest.cc", + "level_controller/level_controller_unittest.cc", + "level_estimator_unittest.cc", + "low_cut_filter_unittest.cc", + "noise_suppression_unittest.cc", + "residual_echo_detector_unittest.cc", + "rms_level_unittest.cc", + "test/debug_dump_replayer.cc", + "test/debug_dump_replayer.h", + "test/debug_dump_test.cc", + "test/echo_canceller_test_tools.cc", + "test/echo_canceller_test_tools.h", + "test/echo_canceller_test_tools_unittest.cc", + "test/test_utils.h", + "voice_detection_unittest.cc", + ] + } + + if ((!build_with_chromium || is_win) && is_clang) { + # Suppress warnings from the Chromium Clang plugin (bugs.webrtc.org/163). + suppressed_configs += [ "//build/config/clang:find_bad_constructs" ] + } + } + + rtc_source_set("audio_processing_perf_tests") { + # Has problems with autogenerated targets on Android and iOS + # Dependency chain (there may also be others): + # :audio_processing_perf_tests --> + # ..:modules_unittests --[private]--> + # ..:modules_unittests_apk --> + # ..:modules_unittests_apk__create --> + # ..:modules_unittests_apk__create__finalize --> + # ..:modules_unittests_apk__create__package --[private]--> + # ..:_modules_unittests__library + check_includes = false + testonly = true + + sources = [ + "audio_processing_performance_unittest.cc", + "level_controller/level_controller_complexity_unittest.cc", + ] + deps = [ + ":audio_processing", + ":audioproc_test_utils", + "../../api:array_view", + "../../modules:module_api", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:system_wrappers", + "../../test:test_support", + ] + + if (rtc_enable_intelligibility_enhancer) { + defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ] + } else { + defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ] + } + } + + rtc_source_set("analog_mic_simulation") { + sources = [ + "test/fake_recording_device.cc", + "test/fake_recording_device.h", + ] + deps = [ + "../../api:array_view", + "../../common_audio:common_audio", + "../../modules:module_api", + "../../rtc_base:rtc_base_approved", + ] + } + + if (rtc_enable_protobuf) { + rtc_executable("unpack_aecdump") { + testonly = true + sources = [ + "test/unpack.cc", + ] + + deps = [ + ":audio_processing", + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + ":audioproc_test_utils", + "../..:webrtc_common", + "../../common_audio", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:system_wrappers_default", + ] + } # unpack_aecdump + + rtc_executable("audioproc_f") { + testonly = true + sources = [ + "test/aec_dump_based_simulator.cc", + "test/aec_dump_based_simulator.h", + "test/audio_processing_simulator.cc", + "test/audio_processing_simulator.h", + "test/audioproc_float.cc", + "test/wav_based_simulator.cc", + "test/wav_based_simulator.h", + ] + + deps = [ + ":analog_mic_simulation", + ":audio_processing", + ":audioproc_debug_proto", + ":audioproc_protobuf_utils", + ":audioproc_test_utils", + "../../api:optional", + "../../common_audio:common_audio", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_base_approved", + "../../rtc_base:rtc_task_queue", + "../../system_wrappers", + "../../system_wrappers:system_wrappers_default", + "../../test:test_support", + "aec_dump", + "aec_dump:aec_dump_impl", + "//testing/gtest", + ] + } # audioproc_f + } + + rtc_source_set("audioproc_test_utils") { + testonly = true + sources = [ + "test/audio_buffer_tools.cc", + "test/audio_buffer_tools.h", + "test/bitexactness_tools.cc", + "test/bitexactness_tools.h", + "test/performance_timer.cc", + "test/performance_timer.h", + "test/simulator_buffers.cc", + "test/simulator_buffers.h", + "test/test_utils.cc", + "test/test_utils.h", + ] + + deps = [ + ":audio_processing", + "..:module_api", + "../../api:array_view", + "../../api:optional", + "../../common_audio", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:system_wrappers", + "../../test:test_support", + "../audio_coding:neteq_input_audio_tools", + "//testing/gtest", + ] + } + + rtc_executable("transient_suppression_test") { + testonly = true + sources = [ + "transient/file_utils.cc", + "transient/file_utils.h", + "transient/transient_suppression_test.cc", + ] + deps = [ + ":audio_processing", + "..:module_api", + "../..:webrtc_common", + "../../common_audio:common_audio", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:metrics_default", + "../../system_wrappers:system_wrappers", + "../../test:test_support", + "//testing/gtest", + ] + } + + rtc_executable("click_annotate") { + testonly = true + sources = [ + "transient/click_annotate.cc", + "transient/file_utils.cc", + "transient/file_utils.h", + ] + deps = [ + ":audio_processing", + "../..:webrtc_common", + "../../system_wrappers:metrics_default", + "../../system_wrappers:system_wrappers", + ] + } + + rtc_executable("nonlinear_beamformer_test") { + testonly = true + sources = [ + "beamformer/nonlinear_beamformer_test.cc", + ] + deps = [ + ":audio_processing", + ":audioproc_test_utils", + "../../common_audio:common_audio", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:metrics_default", + ] + } + + if (rtc_enable_intelligibility_enhancer) { + rtc_executable("intelligibility_proc") { + testonly = true + sources = [ + "intelligibility/test/intelligibility_proc.cc", + ] + deps = [ + ":audio_processing", + ":audioproc_test_utils", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:metrics_default", + "../../test:test_support", + "//testing/gtest", + ] + } + } + + if (rtc_enable_protobuf) { + proto_library("audioproc_unittest_proto") { + sources = [ + "test/unittest.proto", + ] + proto_out_dir = "modules/audio_processing/test" + } + + rtc_static_library("audioproc_protobuf_utils") { + sources = [ + "test/protobuf_utils.cc", + "test/protobuf_utils.h", + ] + + deps = [ + ":audioproc_debug_proto", + "../..:webrtc_common", + "../../rtc_base:protobuf_utils", + "../../rtc_base:rtc_base_approved", + ] + } + } +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/DEPS b/third_party/libwebrtc/webrtc/modules/audio_processing/DEPS new file mode 100644 index 0000000000..79fd071785 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/DEPS @@ -0,0 +1,14 @@ +include_rules = [ + "+audio/utility/audio_frame_operations.h", + "+common_audio", + "+system_wrappers", +] + +specific_include_rules = { + ".*test\.cc": [ + "+rtc_tools", + # Android platform build has different paths. + "+gtest", + "+external/webrtc", + ], +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/OWNERS b/third_party/libwebrtc/webrtc/modules/audio_processing/OWNERS new file mode 100644 index 0000000000..9c521d230a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/OWNERS @@ -0,0 +1,9 @@ +aleloi@webrtc.org +aluebs@webrtc.org +henrik.lundin@webrtc.org +peah@webrtc.org + +# These are for the common case of adding or renaming files. If you're doing +# structural changes, please get a review from a reviewer in this file. +per-file *.gn=* +per-file *.gni=* diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_common.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_common.h new file mode 100644 index 0000000000..80c5c14813 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_common.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ + +#include "typedefs.h" // NOLINT(build/include) + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +#ifdef __cplusplus +namespace webrtc { +#endif + +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65]; +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65]; +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65]; +extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2]; +extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2]; +extern const float WebRtcAec_kMinFarendPSD; + +#ifdef __cplusplus +} // namespace webrtc +#endif + +#endif // MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core.cc new file mode 100644 index 0000000000..8f4fda1a1b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core.cc @@ -0,0 +1,2057 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, which is presented with time-aligned signals. + */ + +#include "modules/audio_processing/aec/aec_core.h" + +#include <algorithm> +#include <math.h> +#include <stddef.h> // size_t +#include <stdlib.h> +#include <string.h> + +#include "rtc_base/checks.h" +extern "C" { +#include "common_audio/ring_buffer.h" +} +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aec/aec_common.h" +#include "modules/audio_processing/aec/aec_core_optimized_methods.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "system_wrappers/include/metrics.h" +#include "typedefs.h" // NOLINT(build/include) +#include "rtc_base/criticalsection.h" + +namespace webrtc { +namespace { +enum class DelaySource { + kSystemDelay, // The delay values come from the OS. + kDelayAgnostic, // The delay values come from the DA-AEC. +}; + +constexpr int kMinDelayLogValue = -200; +constexpr int kMaxDelayLogValue = 200; +constexpr int kNumDelayLogBuckets = 100; + +void MaybeLogDelayAdjustment(int moved_ms, DelaySource source) { + if (moved_ms == 0) + return; + switch (source) { + case DelaySource::kSystemDelay: + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AecDelayAdjustmentMsSystemValue", + moved_ms, kMinDelayLogValue, kMaxDelayLogValue, + kNumDelayLogBuckets); + return; + case DelaySource::kDelayAgnostic: + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AecDelayAdjustmentMsAgnosticValue", + moved_ms, kMinDelayLogValue, kMaxDelayLogValue, + kNumDelayLogBuckets); + return; + } +} +} // namespace + +// Buffer size (samples) +static const size_t kBufferSizeBlocks = 250; // 1 second of audio in 16 kHz. + +// Metrics +static const size_t kSubCountLen = 4; +static const size_t kCountLen = 50; +static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. + +// Divergence metric is based on audio level, which gets updated every +// |kSubCountLen + 1| * PART_LEN samples. Divergence metric takes the statistics +// of |kDivergentFilterFractionAggregationWindowSize| audio levels. The +// following value corresponds to 1 second at 16 kHz. +static const int kDivergentFilterFractionAggregationWindowSize = 50; + +// Quantities to control H band scaling for SWB input +static const float cnScaleHband = 0.4f; // scale for comfort noise in H band. +// Initial bin for averaging nlp gain in low band +static const int freqAvgIc = PART_LEN / 2; + +// Matlab code to produce table: +// win = sqrt(hanning(63)); win = [0 ; win(1:32)]; +// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); +ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f}; + +// Matlab code to produce table: +// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; +// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); +ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = { + 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f, + 0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f, + 0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, + 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f, + 0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f, + 0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, + 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f, + 0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f, + 0.4000f}; + +// Matlab code to produce table: +// overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; +// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); +ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = { + 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f, + 1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f, + 1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, + 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f, + 1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f, + 1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, + 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f, + 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f, + 2.0000f}; + +// Delay Agnostic AEC parameters, still under development and may change. +static const float kDelayQualityThresholdMax = 0.07f; +static const float kDelayQualityThresholdMin = 0.01f; +static const int kInitialShiftOffset = 5; +#if !defined(WEBRTC_ANDROID) +static const int kDelayCorrectionStart = 1500; // 10 ms chunks +#endif + +// Target suppression levels for nlp modes. +// log{0.001, 0.00001, 0.00000001} +static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f}; + +// Two sets of parameters, one for the extended filter mode. +static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f}; +static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f}; +const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.92f, 0.08f}}; +const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.93f, 0.07f}}; + +// Number of partitions forming the NLP's "preferred" bands. +enum { kPrefBandSize = 24 }; + +rtc::CriticalSection WebRtcAec_CriticalSection; +WebRtcAecFilterFar WebRtcAec_FilterFar; +WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; +WebRtcAecOverdrive WebRtcAec_Overdrive; +WebRtcAecSuppress WebRtcAec_Suppress; +WebRtcAecComputeCoherence WebRtcAec_ComputeCoherence; +WebRtcAecUpdateCoherenceSpectra WebRtcAec_UpdateCoherenceSpectra; +WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; +WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; +WebRtcAecWindowData WebRtcAec_WindowData; + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; +} + +// TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a +// window, of which the length is 1 unit longer than indicated. Remove "+1" when +// the code is refactored. +PowerLevel::PowerLevel() + : framelevel(kSubCountLen + 1), + averagelevel(kCountLen + 1) { +} + +BlockBuffer::BlockBuffer() { + buffer_ = WebRtc_CreateBuffer(kBufferSizeBlocks, sizeof(float) * PART_LEN); + RTC_CHECK(buffer_); + ReInit(); +} + +BlockBuffer::~BlockBuffer() { + WebRtc_FreeBuffer(buffer_); +} + +void BlockBuffer::ReInit() { + WebRtc_InitBuffer(buffer_); +} + +void BlockBuffer::Insert(const float block[PART_LEN]) { + WebRtc_WriteBuffer(buffer_, block, 1); +} + +void BlockBuffer::ExtractExtendedBlock(float extended_block[PART_LEN2]) { + float* block_ptr = NULL; + RTC_DCHECK_LT(0, AvaliableSpace()); + + // Extract the previous block. + WebRtc_MoveReadPtr(buffer_, -1); + size_t read_elements = WebRtc_ReadBuffer( + buffer_, reinterpret_cast<void**>(&block_ptr), &extended_block[0], 1); + if (read_elements == 0u) { + std::fill_n(&extended_block[0], PART_LEN, 0.0f); + } else if (block_ptr != &extended_block[0]) { + memcpy(&extended_block[0], block_ptr, PART_LEN * sizeof(float)); + } + + // Extract the current block. + read_elements = + WebRtc_ReadBuffer(buffer_, reinterpret_cast<void**>(&block_ptr), + &extended_block[PART_LEN], 1); + if (read_elements == 0u) { + std::fill_n(&extended_block[PART_LEN], PART_LEN, 0.0f); + } else if (block_ptr != &extended_block[PART_LEN]) { + memcpy(&extended_block[PART_LEN], block_ptr, PART_LEN * sizeof(float)); + } +} + +int BlockBuffer::AdjustSize(int buffer_size_decrease) { + return WebRtc_MoveReadPtr(buffer_, buffer_size_decrease); +} + +size_t BlockBuffer::Size() { + return static_cast<int>(WebRtc_available_read(buffer_)); +} + +size_t BlockBuffer::AvaliableSpace() { + return WebRtc_available_write(buffer_); +} + +DivergentFilterFraction::DivergentFilterFraction() + : count_(0), + occurrence_(0), + fraction_(-1.0) { +} + +void DivergentFilterFraction::Reset() { + Clear(); + fraction_ = -1.0; +} + +void DivergentFilterFraction::AddObservation(const PowerLevel& nearlevel, + const PowerLevel& linoutlevel, + const PowerLevel& nlpoutlevel) { + const float near_level = nearlevel.framelevel.GetLatestMean(); + const float level_increase = + linoutlevel.framelevel.GetLatestMean() - near_level; + const bool output_signal_active = nlpoutlevel.framelevel.GetLatestMean() > + 40.0 * nlpoutlevel.minlevel; + // Level increase should be, in principle, negative, when the filter + // does not diverge. Here we allow some margin (0.01 * near end level) and + // numerical error (1.0). We count divergence only when the AEC output + // signal is active. + if (output_signal_active && + level_increase > std::max(0.01 * near_level, 1.0)) + occurrence_++; + ++count_; + if (count_ == kDivergentFilterFractionAggregationWindowSize) { + fraction_ = static_cast<float>(occurrence_) / + kDivergentFilterFractionAggregationWindowSize; + Clear(); + } +} + +float DivergentFilterFraction::GetLatestFraction() const { + return fraction_; +} + +void DivergentFilterFraction::Clear() { + count_ = 0; + occurrence_ = 0; +} + +// TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor. +AecCore::AecCore(int instance_index) + : data_dumper(new ApmDataDumper(instance_index)) {} + +AecCore::~AecCore() {} + +static int CmpFloat(const void* a, const void* b) { + const float* da = (const float*)a; + const float* db = (const float*)b; + + return (*da > *db) - (*da < *db); +} + +static void FilterFar(int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { + int i; + for (i = 0; i < num_partitions; i++) { + int j; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); + } + + for (j = 0; j < PART_LEN1; j++) { + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); + } + } +} + +static void ScaleErrorSignal(float mu, + float error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + int i; + float abs_ef; + for (i = 0; i < (PART_LEN1); i++) { + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; + } +} + +static void FilterAdaptation( + const OouraFft& ooura_fft, + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + int i, j; + float fft[PART_LEN2]; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); + int pos; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + pos = i * PART_LEN1; + + for (j = 0; j < PART_LEN; j++) { + fft[2 * j] = MulRe(x_fft_buf[0][xPos + j], -x_fft_buf[1][xPos + j], + e_fft[0][j], e_fft[1][j]); + fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j], -x_fft_buf[1][xPos + j], + e_fft[0][j], e_fft[1][j]); + } + fft[1] = + MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], e_fft[1][PART_LEN]); + + ooura_fft.InverseFft(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + for (j = 0; j < PART_LEN; j++) { + fft[j] *= scale; + } + } + ooura_fft.Fft(fft); + + h_fft_buf[0][pos] += fft[0]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; + + for (j = 1; j < PART_LEN; j++) { + h_fft_buf[0][pos + j] += fft[2 * j]; + h_fft_buf[1][pos + j] += fft[2 * j + 1]; + } + } +} + +static void Overdrive(float overdrive_scaling, + const float hNlFb, + float hNl[PART_LEN1]) { + for (int i = 0; i < PART_LEN1; ++i) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} + +static void Suppress(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { + for (int i = 0; i < PART_LEN1; ++i) { + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters here + // because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +static int PartitionDelay(int num_partitions, + float h_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1]) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + for (j = 0; j < PART_LEN1; j++) { + wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + + h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Update metric with 10 * log10(numerator / denominator). +static void UpdateLogRatioMetric(Stats* metric, float numerator, + float denominator) { + RTC_DCHECK(metric); + RTC_CHECK(numerator >= 0); + RTC_CHECK(denominator >= 0); + + const float log_numerator = log10(numerator + 1e-10f); + const float log_denominator = log10(denominator + 1e-10f); + metric->instant = 10.0f * (log_numerator - log_denominator); + + // Max. + if (metric->instant > metric->max) + metric->max = metric->instant; + + // Min. + if (metric->instant < metric->min) + metric->min = metric->instant; + + // Average. + metric->counter++; + // This is to protect overflow, which should almost never happen. + RTC_CHECK_NE(0, metric->counter); + metric->sum += metric->instant; + metric->average = metric->sum / metric->counter; + + // Upper mean. + if (metric->instant > metric->average) { + metric->hicounter++; + // This is to protect overflow, which should almost never happen. + RTC_CHECK_NE(0, metric->hicounter); + metric->hisum += metric->instant; + metric->himean = metric->hisum / metric->hicounter; + } +} + +// Threshold to protect against the ill-effects of a zero far-end. +const float WebRtcAec_kMinFarendPSD = 15; + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is +// determined. +static void UpdateCoherenceSpectra(int mult, + bool extended_filter_enabled, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1], + CoherenceState* coherence_state, + short* filter_divergence_state, + int* extreme_filter_divergence) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = + extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[mult - 1]; + int i; + float sdSum = 0, seSum = 0; + + for (i = 0; i < PART_LEN1; i++) { + coherence_state->sd[i] = + ptrGCoh[0] * coherence_state->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + coherence_state->se[i] = + ptrGCoh[0] * coherence_state->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + coherence_state->sx[i] = + ptrGCoh[0] * coherence_state->sx[i] + + ptrGCoh[1] * + WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + coherence_state->sde[i][0] = + ptrGCoh[0] * coherence_state->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + coherence_state->sde[i][1] = + ptrGCoh[0] * coherence_state->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + coherence_state->sxd[i][0] = + ptrGCoh[0] * coherence_state->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + coherence_state->sxd[i][1] = + ptrGCoh[0] * coherence_state->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += coherence_state->sd[i]; + seSum += coherence_state->se[i]; + } + + // Divergent filter safeguard update. + *filter_divergence_state = + (*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum; + + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); +} + +// Window time domain data to be used by the fft. +__inline static void WindowData(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i++) { + x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i]; + x_windowed[PART_LEN + i] = + x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; + } +} + +// Puts fft output data into a complex valued array. +__inline static void StoreAsComplex(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + data_complex[0][0] = data[0]; + data_complex[1][0] = 0; + for (i = 1; i < PART_LEN; i++) { + data_complex[0][i] = data[2 * i]; + data_complex[1][i] = data[2 * i + 1]; + } + data_complex[0][PART_LEN] = data[1]; + data_complex[1][PART_LEN] = 0; +} + +static void ComputeCoherence(const CoherenceState* coherence_state, + float* cohde, + float* cohxd) { + // Subband coherence + for (int i = 0; i < PART_LEN1; i++) { + cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] + + coherence_state->sde[i][1] * coherence_state->sde[i][1]) / + (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f); + cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + + coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / + (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); + } +} + +static void GetHighbandGain(const float* lambda, float* nlpGainHband) { + int i; + + *nlpGainHband = 0.0f; + for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { + *nlpGainHband += lambda[i]; + } + *nlpGainHband /= static_cast<float>(PART_LEN1 - 1 - freqAvgIc); +} + +static void GenerateComplexNoise(uint32_t* seed, float noise[2][PART_LEN1]) { + const float kPi2 = 6.28318530717959f; + int16_t randW16[PART_LEN]; + WebRtcSpl_RandUArray(randW16, PART_LEN, seed); + + noise[0][0] = 0; + noise[1][0] = 0; + for (size_t i = 1; i < PART_LEN1; i++) { + float tmp = kPi2 * randW16[i - 1] / 32768.f; + noise[0][i] = cosf(tmp); + noise[1][i] = -sinf(tmp); + } + noise[1][PART_LEN] = 0; +} + +static void ComfortNoise(bool generate_high_frequency_noise, + uint32_t* seed, + float e_fft[2][PART_LEN1], + float high_frequency_comfort_noise[2][PART_LEN1], + const float* noise_spectrum, + const float* suppressor_gain) { + float complex_noise[2][PART_LEN1]; + + GenerateComplexNoise(seed, complex_noise); + + // Shape, scale and add comfort noise. + for (int i = 1; i < PART_LEN1; ++i) { + float noise_scaling = + sqrtf(WEBRTC_SPL_MAX(1 - suppressor_gain[i] * suppressor_gain[i], 0)) * + sqrtf(noise_spectrum[i]); + e_fft[0][i] += noise_scaling * complex_noise[0][i]; + e_fft[1][i] += noise_scaling * complex_noise[1][i]; + } + + // Form comfort noise for higher frequencies. + if (generate_high_frequency_noise) { + // Compute average noise power and nlp gain over the second half of freq + // spectrum (i.e., 4->8khz). + int start_avg_band = PART_LEN1 / 2; + float upper_bands_noise_power = 0.f; + float upper_bands_suppressor_gain = 0.f; + for (int i = start_avg_band; i < PART_LEN1; ++i) { + upper_bands_noise_power += sqrtf(noise_spectrum[i]); + upper_bands_suppressor_gain += + sqrtf(WEBRTC_SPL_MAX(1 - suppressor_gain[i] * suppressor_gain[i], 0)); + } + upper_bands_noise_power /= (PART_LEN1 - start_avg_band); + upper_bands_suppressor_gain /= (PART_LEN1 - start_avg_band); + + // Shape, scale and add comfort noise. + float noise_scaling = upper_bands_suppressor_gain * upper_bands_noise_power; + high_frequency_comfort_noise[0][0] = 0; + high_frequency_comfort_noise[1][0] = 0; + for (int i = 1; i < PART_LEN1; ++i) { + high_frequency_comfort_noise[0][i] = noise_scaling * complex_noise[0][i]; + high_frequency_comfort_noise[1][i] = noise_scaling * complex_noise[1][i]; + } + high_frequency_comfort_noise[1][PART_LEN] = 0; + } else { + memset(high_frequency_comfort_noise, 0, + 2 * PART_LEN1 * sizeof(high_frequency_comfort_noise[0][0])); + } +} + +static void InitLevel(PowerLevel* level) { + const float kBigFloat = 1E17f; + level->averagelevel.Reset(); + level->framelevel.Reset(); + level->minlevel = kBigFloat; +} + +static void InitStats(Stats* stats) { + stats->instant = kOffsetLevel; + stats->average = kOffsetLevel; + stats->max = kOffsetLevel; + stats->min = kOffsetLevel * (-1); + stats->sum = 0; + stats->hisum = 0; + stats->himean = kOffsetLevel; + stats->counter = 0; + stats->hicounter = 0; +} + +static void InitMetrics(AecCore* self) { + self->stateCounter = 0; + InitLevel(&self->farlevel); + InitLevel(&self->nearlevel); + InitLevel(&self->linoutlevel); + InitLevel(&self->nlpoutlevel); + + InitStats(&self->erl); + InitStats(&self->erle); + InitStats(&self->aNlp); + InitStats(&self->rerl); + + self->divergent_filter_fraction.Reset(); +} + +static float CalculatePower(const float* in, size_t num_samples) { + size_t k; + float energy = 0.0f; + + for (k = 0; k < num_samples; ++k) { + energy += in[k] * in[k]; + } + return energy / num_samples; +} + +static void UpdateLevel(PowerLevel* level, float power) { + level->framelevel.AddValue(power); + if (level->framelevel.EndOfBlock()) { + const float new_frame_level = level->framelevel.GetLatestMean(); + if (new_frame_level > 0) { + if (new_frame_level < level->minlevel) { + level->minlevel = new_frame_level; // New minimum. + } else { + level->minlevel *= (1 + 0.001f); // Small increase. + } + } + level->averagelevel.AddValue(new_frame_level); + } +} + +static void UpdateMetrics(AecCore* aec) { + const float actThresholdNoisy = 8.0f; + const float actThresholdClean = 40.0f; + + const float noisyPower = 300000.0f; + + float actThreshold; + + if (aec->echoState) { // Check if echo is likely present + aec->stateCounter++; + } + + if (aec->linoutlevel.framelevel.EndOfBlock()) { + aec->divergent_filter_fraction.AddObservation(aec->nearlevel, + aec->linoutlevel, + aec->nlpoutlevel); + } + + if (aec->farlevel.averagelevel.EndOfBlock()) { + if (aec->farlevel.minlevel < noisyPower) { + actThreshold = actThresholdClean; + } else { + actThreshold = actThresholdNoisy; + } + + const float far_average_level = aec->farlevel.averagelevel.GetLatestMean(); + + // The last condition is to let estimation be made in active far-end + // segments only. + if ((aec->stateCounter > (0.5f * kCountLen * kSubCountLen)) && + (aec->farlevel.framelevel.EndOfBlock()) && + (far_average_level > (actThreshold * aec->farlevel.minlevel))) { + + // ERL: error return loss. + const float near_average_level = + aec->nearlevel.averagelevel.GetLatestMean(); + UpdateLogRatioMetric(&aec->erl, far_average_level, near_average_level); + + // A_NLP: error return loss enhanced before the nonlinear suppression. + const float linout_average_level = + aec->linoutlevel.averagelevel.GetLatestMean(); + UpdateLogRatioMetric(&aec->aNlp, near_average_level, + linout_average_level); + + // ERLE: error return loss enhanced. + const float nlpout_average_level = + aec->nlpoutlevel.averagelevel.GetLatestMean(); + UpdateLogRatioMetric(&aec->erle, near_average_level, + nlpout_average_level); + } + + aec->stateCounter = 0; + } +} + +static void UpdateDelayMetrics(AecCore* self) { + int i = 0; + int delay_values = 0; + int median = 0; + int lookahead = WebRtc_lookahead(self->delay_estimator); + const int kMsPerBlock = PART_LEN / (self->mult * 8); + int64_t l1_norm = 0; + + if (self->num_delay_values == 0) { + // We have no new delay value data. Even though -1 is a valid |median| in + // the sense that we allow negative values, it will practically never be + // used since multiples of |kMsPerBlock| will always be returned. + // We therefore use -1 to indicate in the logs that the delay estimator was + // not able to estimate the delay. + self->delay_median = -1; + self->delay_std = -1; + self->fraction_poor_delays = -1; + return; + } + + // Start value for median count down. + delay_values = self->num_delay_values >> 1; + // Get median of delay values since last update. + for (i = 0; i < kHistorySizeBlocks; i++) { + delay_values -= self->delay_histogram[i]; + if (delay_values < 0) { + median = i; + break; + } + } + // Account for lookahead. + self->delay_median = (median - lookahead) * kMsPerBlock; + + // Calculate the L1 norm, with median value as central moment. + for (i = 0; i < kHistorySizeBlocks; i++) { + l1_norm += abs(i - median) * self->delay_histogram[i]; + } + self->delay_std = + static_cast<int>((l1_norm + self->num_delay_values / 2) / + self->num_delay_values) * kMsPerBlock; + + // Determine fraction of delays that are out of bounds, that is, either + // negative (anti-causal system) or larger than the AEC filter length. + { + int num_delays_out_of_bounds = self->num_delay_values; + const int histogram_length = + sizeof(self->delay_histogram) / sizeof(self->delay_histogram[0]); + for (i = lookahead; i < lookahead + self->num_partitions; ++i) { + if (i < histogram_length) + num_delays_out_of_bounds -= self->delay_histogram[i]; + } + self->fraction_poor_delays = + static_cast<float>(num_delays_out_of_bounds) / self->num_delay_values; + } + + // Reset histogram. + memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); + self->num_delay_values = 0; +} + +static void ScaledInverseFft(const OouraFft& ooura_fft, + float freq_data[2][PART_LEN1], + float time_data[PART_LEN2], + float scale, + int conjugate) { + int i; + const float normalization = scale / static_cast<float>(PART_LEN2); + const float sign = (conjugate ? -1 : 1); + time_data[0] = freq_data[0][0] * normalization; + time_data[1] = freq_data[0][PART_LEN] * normalization; + for (i = 1; i < PART_LEN; i++) { + time_data[2 * i] = freq_data[0][i] * normalization; + time_data[2 * i + 1] = sign * freq_data[1][i] * normalization; + } + ooura_fft.InverseFft(time_data); +} + +static void Fft(const OouraFft& ooura_fft, + float time_data[PART_LEN2], + float freq_data[2][PART_LEN1]) { + int i; + ooura_fft.Fft(time_data); + + // Reorder fft output data. + freq_data[1][0] = 0; + freq_data[1][PART_LEN] = 0; + freq_data[0][0] = time_data[0]; + freq_data[0][PART_LEN] = time_data[1]; + for (i = 1; i < PART_LEN; i++) { + freq_data[0][i] = time_data[2 * i]; + freq_data[1][i] = time_data[2 * i + 1]; + } +} + +static int SignalBasedDelayCorrection(AecCore* self) { + int delay_correction = 0; + int last_delay = -2; + RTC_DCHECK(self); +#if !defined(WEBRTC_ANDROID) + // On desktops, turn on correction after |kDelayCorrectionStart| frames. This + // is to let the delay estimation get a chance to converge. Also, if the + // playout audio volume is low (or even muted) the delay estimation can return + // a very large delay, which will break the AEC if it is applied. + if (self->frame_count < kDelayCorrectionStart) { + self->data_dumper->DumpRaw("aec_da_reported_delay", 1, &last_delay); + return 0; + } +#endif + + // 1. Check for non-negative delay estimate. Note that the estimates we get + // from the delay estimation are not compensated for lookahead. Hence, a + // negative |last_delay| is an invalid one. + // 2. Verify that there is a delay change. In addition, only allow a change + // if the delay is outside a certain region taking the AEC filter length + // into account. + // TODO(bjornv): Investigate if we can remove the non-zero delay change check. + // 3. Only allow delay correction if the delay estimation quality exceeds + // |delay_quality_threshold|. + // 4. Finally, verify that the proposed |delay_correction| is feasible by + // comparing with the size of the far-end buffer. + last_delay = WebRtc_last_delay(self->delay_estimator); + self->data_dumper->DumpRaw("aec_da_reported_delay", 1, &last_delay); + if ((last_delay >= 0) && (last_delay != self->previous_delay) && + (WebRtc_last_delay_quality(self->delay_estimator) > + self->delay_quality_threshold)) { + int delay = last_delay - WebRtc_lookahead(self->delay_estimator); + // Allow for a slack in the actual delay, defined by a |lower_bound| and an + // |upper_bound|. The adaptive echo cancellation filter is currently + // |num_partitions| (of 64 samples) long. If the delay estimate is negative + // or at least 3/4 of the filter length we open up for correction. + const int lower_bound = 0; + const int upper_bound = self->num_partitions * 3 / 4; + const int do_correction = delay <= lower_bound || delay > upper_bound; + if (do_correction == 1) { + int available_read = self->farend_block_buffer_.Size(); + // With |shift_offset| we gradually rely on the delay estimates. For + // positive delays we reduce the correction by |shift_offset| to lower the + // risk of pushing the AEC into a non causal state. For negative delays + // we rely on the values up to a rounding error, hence compensate by 1 + // element to make sure to push the delay into the causal region. + delay_correction = -delay; + delay_correction += delay > self->shift_offset ? self->shift_offset : 1; + self->shift_offset--; + self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset); + if (delay_correction > available_read - self->mult - 1) { + // There is not enough data in the buffer to perform this shift. Hence, + // we do not rely on the delay estimate and do nothing. + delay_correction = 0; + } else { + self->previous_delay = last_delay; + ++self->delay_correction_count; + } + } + } + // Update the |delay_quality_threshold| once we have our first delay + // correction. + if (self->delay_correction_count > 0) { + float delay_quality = WebRtc_last_delay_quality(self->delay_estimator); + delay_quality = + (delay_quality > kDelayQualityThresholdMax ? kDelayQualityThresholdMax + : delay_quality); + self->delay_quality_threshold = + (delay_quality > self->delay_quality_threshold + ? delay_quality + : self->delay_quality_threshold); + } + self->data_dumper->DumpRaw("aec_da_delay_correction", 1, &delay_correction); + + return delay_correction; +} + +static void RegressorPower(int num_partitions, + int latest_added_partition, + float x_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1], + float x_pow[PART_LEN1]) { + RTC_DCHECK_LT(latest_added_partition, num_partitions); + memset(x_pow, 0, PART_LEN1 * sizeof(x_pow[0])); + + int partition = latest_added_partition; + int x_fft_buf_position = partition * PART_LEN1; + for (int i = 0; i < num_partitions; ++i) { + for (int bin = 0; bin < PART_LEN1; ++bin) { + float re = x_fft_buf[0][x_fft_buf_position]; + float im = x_fft_buf[1][x_fft_buf_position]; + x_pow[bin] += re * re + im * im; + ++x_fft_buf_position; + } + + ++partition; + if (partition == num_partitions) { + partition = 0; + RTC_DCHECK_EQ(num_partitions * PART_LEN1, x_fft_buf_position); + x_fft_buf_position = 0; + } + } +} + +static void EchoSubtraction(const OouraFft& ooura_fft, + int num_partitions, + int extended_filter_enabled, + int* extreme_filter_divergence, + float filter_step_size, + float error_threshold, + float* x_fft, + int* x_fft_buf_block_pos, + float x_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1], + float* const y, + float x_pow[PART_LEN1], + float h_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1], + float echo_subtractor_output[PART_LEN]) { + float s_fft[2][PART_LEN1]; + float e_extended[PART_LEN2]; + float s_extended[PART_LEN2]; + float* s; + float e[PART_LEN]; + float e_fft[2][PART_LEN1]; + int i; + + // Update the x_fft_buf block position. + (*x_fft_buf_block_pos)--; + if ((*x_fft_buf_block_pos) == -1) { + *x_fft_buf_block_pos = num_partitions - 1; + } + + // Buffer x_fft. + memcpy(x_fft_buf[0] + (*x_fft_buf_block_pos) * PART_LEN1, x_fft, + sizeof(float) * PART_LEN1); + memcpy(x_fft_buf[1] + (*x_fft_buf_block_pos) * PART_LEN1, &x_fft[PART_LEN1], + sizeof(float) * PART_LEN1); + + memset(s_fft, 0, sizeof(s_fft)); + + // Conditionally reset the echo subtraction filter if the filter has diverged + // significantly. + if (!extended_filter_enabled && *extreme_filter_divergence) { + memset(h_fft_buf, 0, + 2 * kExtendedNumPartitions * PART_LEN1 * sizeof(h_fft_buf[0][0])); + *extreme_filter_divergence = 0; + } + + // Produce echo estimate s_fft. + WebRtcAec_FilterFar(num_partitions, *x_fft_buf_block_pos, x_fft_buf, + h_fft_buf, s_fft); + + // Compute the time-domain echo estimate s. + ScaledInverseFft(ooura_fft, s_fft, s_extended, 2.0f, 0); + s = &s_extended[PART_LEN]; + + // Compute the time-domain echo prediction error. + for (i = 0; i < PART_LEN; ++i) { + e[i] = y[i] - s[i]; + } + + // Compute the frequency domain echo prediction error. + memset(e_extended, 0, sizeof(float) * PART_LEN); + memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); + Fft(ooura_fft, e_extended, e_fft); + + // Scale error signal inversely with far power. + WebRtcAec_ScaleErrorSignal(filter_step_size, error_threshold, x_pow, e_fft); + WebRtcAec_FilterAdaptation(ooura_fft, num_partitions, *x_fft_buf_block_pos, + x_fft_buf, e_fft, h_fft_buf); + memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN); +} + +static void FormSuppressionGain(AecCore* aec, + float cohde[PART_LEN1], + float cohxd[PART_LEN1], + float hNl[PART_LEN1]) { + float hNlDeAvg, hNlXdAvg; + float hNlPref[kPrefBandSize]; + float hNlFb = 0, hNlFbLow = 0; + const int prefBandSize = kPrefBandSize / aec->mult; + const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; + const int minPrefBand = 4 / aec->mult; + // Power estimate smoothing coefficients. + const float* min_overdrive = aec->extended_filter_enabled + ? kExtendedMinOverDrive + : kNormalMinOverDrive; + + hNlXdAvg = 0; + for (int i = minPrefBand; i < prefBandSize + minPrefBand; ++i) { + hNlXdAvg += cohxd[i]; + } + hNlXdAvg /= prefBandSize; + hNlXdAvg = 1 - hNlXdAvg; + + hNlDeAvg = 0; + for (int i = minPrefBand; i < prefBandSize + minPrefBand; ++i) { + hNlDeAvg += cohde[i]; + } + hNlDeAvg /= prefBandSize; + + if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { + aec->hNlXdAvgMin = hNlXdAvg; + } + + if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { + aec->stNearState = 1; + } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { + aec->stNearState = 0; + } + + if (aec->hNlXdAvgMin == 1) { + aec->echoState = 0; + aec->overDrive = min_overdrive[aec->nlp_mode]; + + if (aec->stNearState == 1) { + memcpy(hNl, cohde, sizeof(hNl[0]) * PART_LEN1); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + for (int i = 0; i < PART_LEN1; ++i) { + hNl[i] = 1 - cohxd[i]; + } + hNlFb = hNlXdAvg; + hNlFbLow = hNlXdAvg; + } + } else { + if (aec->stNearState == 1) { + aec->echoState = 0; + memcpy(hNl, cohde, sizeof(hNl[0]) * PART_LEN1); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + aec->echoState = 1; + for (int i = 0; i < PART_LEN1; ++i) { + hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); + } + + // Select an order statistic from the preferred bands. + // TODO(peah): Using quicksort now, but a selection algorithm may be + // preferred. + memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); + qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); + hNlFb = hNlPref[static_cast<int>(floor(prefBandQuant * + (prefBandSize - 1)))]; + hNlFbLow = hNlPref[static_cast<int>(floor(prefBandQuantLow * + (prefBandSize - 1)))]; + } + } + + // Track the local filter minimum to determine suppression overdrive. + if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { + aec->hNlFbLocalMin = hNlFbLow; + aec->hNlFbMin = hNlFbLow; + aec->hNlNewMin = 1; + aec->hNlMinCtr = 0; + } + aec->hNlFbLocalMin = + WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); + aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); + + if (aec->hNlNewMin == 1) { + aec->hNlMinCtr++; + } + if (aec->hNlMinCtr == 2) { + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = + WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / + static_cast<float>(log(aec->hNlFbMin + 1e-10f) + 1e-10f), + min_overdrive[aec->nlp_mode]); + } + + // Smooth the overdrive. + if (aec->overDrive < aec->overdrive_scaling) { + aec->overdrive_scaling = + 0.99f * aec->overdrive_scaling + 0.01f * aec->overDrive; + } else { + aec->overdrive_scaling = + 0.9f * aec->overdrive_scaling + 0.1f * aec->overDrive; + } + + // Apply the overdrive. + WebRtcAec_Overdrive(aec->overdrive_scaling, hNlFb, hNl); +} + +static void EchoSuppression(const OouraFft& ooura_fft, + AecCore* aec, + float* nearend_extended_block_lowest_band, + float farend_extended_block[PART_LEN2], + float* echo_subtractor_output, + float output[NUM_HIGH_BANDS_MAX + 1][PART_LEN]) { + float efw[2][PART_LEN1]; + float xfw[2][PART_LEN1]; + float dfw[2][PART_LEN1]; + float comfortNoiseHband[2][PART_LEN1]; + float fft[PART_LEN2]; + float nlpGainHband; + int i; + size_t j; + + // Coherence and non-linear filter + float cohde[PART_LEN1], cohxd[PART_LEN1]; + float hNl[PART_LEN1]; + + // Filter energy + const int delayEstInterval = 10 * aec->mult; + + float* xfw_ptr = NULL; + + // Update eBuf with echo subtractor output. + memcpy(aec->eBuf + PART_LEN, echo_subtractor_output, + sizeof(float) * PART_LEN); + + // Analysis filter banks for the echo suppressor. + // Windowed near-end ffts. + WindowData(fft, nearend_extended_block_lowest_band); + ooura_fft.Fft(fft); + StoreAsComplex(fft, dfw); + + // Windowed echo suppressor output ffts. + WindowData(fft, aec->eBuf); + ooura_fft.Fft(fft); + StoreAsComplex(fft, efw); + + // NLP + + // Convert far-end partition to the frequency domain with windowing. + WindowData(fft, farend_extended_block); + Fft(ooura_fft, fft, xfw); + xfw_ptr = &xfw[0][0]; + + // Buffer far. + memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); + + aec->delayEstCtr++; + if (aec->delayEstCtr == delayEstInterval) { + aec->delayEstCtr = 0; + aec->delayIdx = WebRtcAec_PartitionDelay(aec->num_partitions, aec->wfBuf); + } + + aec->data_dumper->DumpRaw("aec_nlp_delay", 1, &aec->delayIdx); + + // Use delayed far. + memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + WebRtcAec_UpdateCoherenceSpectra(aec->mult, aec->extended_filter_enabled == 1, + efw, dfw, xfw, &aec->coherence_state, + &aec->divergeState, + &aec->extreme_filter_divergence); + + WebRtcAec_ComputeCoherence(&aec->coherence_state, cohde, cohxd); + + // Select the microphone signal as output if the filter is deemed to have + // diverged. + if (aec->divergeState) { + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + } + + FormSuppressionGain(aec, cohde, cohxd, hNl); + + aec->data_dumper->DumpRaw("aec_nlp_gain", PART_LEN1, hNl); + + WebRtcAec_Suppress(hNl, efw); + + // Add comfort noise. + ComfortNoise(aec->num_bands > 1, &aec->seed, efw, comfortNoiseHband, + aec->noisePow, hNl); + + // Inverse error fft. + ScaledInverseFft(ooura_fft, efw, fft, 2.0f, 1); + + // Overlap and add to obtain output. + for (i = 0; i < PART_LEN; i++) { + output[0][i] = (fft[i] * WebRtcAec_sqrtHanning[i] + + aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); + + // Saturate output to keep it in the allowed range. + output[0][i] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, output[0][i], + WEBRTC_SPL_WORD16_MIN); + } + memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); + + // For H band + if (aec->num_bands > 1) { + // H band gain + // average nlp over low band: average over second half of freq spectrum + // (4->8khz) + GetHighbandGain(hNl, &nlpGainHband); + + // Inverse comfort_noise + ScaledInverseFft(ooura_fft, comfortNoiseHband, fft, 2.0f, 0); + + // compute gain factor + for (j = 1; j < aec->num_bands; ++j) { + for (i = 0; i < PART_LEN; i++) { + output[j][i] = aec->previous_nearend_block[j][i] * nlpGainHband; + } + } + + // Add some comfort noise where Hband is attenuated. + for (i = 0; i < PART_LEN; i++) { + output[1][i] += cnScaleHband * fft[i]; + } + + // Saturate output to keep it in the allowed range. + for (j = 1; j < aec->num_bands; ++j) { + for (i = 0; i < PART_LEN; i++) { + output[j][i] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, output[j][i], + WEBRTC_SPL_WORD16_MIN); + } + } + } + + // Copy the current block to the old position. + memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); + + memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, + sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1); +} + +static void ProcessNearendBlock( + AecCore* aec, + float farend_extended_block_lowest_band[PART_LEN2], + float nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN], + float output_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN]) { + size_t i; + + float fft[PART_LEN2]; + float nearend_extended_block_lowest_band[PART_LEN2]; + float farend_fft[2][PART_LEN1]; + float nearend_fft[2][PART_LEN1]; + float far_spectrum = 0.0f; + float near_spectrum = 0.0f; + float abs_far_spectrum[PART_LEN1]; + float abs_near_spectrum[PART_LEN1]; + + const float gPow[2] = {0.9f, 0.1f}; + + // Noise estimate constants. + const int noiseInitBlocks = 500 * aec->mult; + const float step = 0.1f; + const float ramp = 1.0002f; + const float gInitNoise[2] = {0.999f, 0.001f}; + + float echo_subtractor_output[PART_LEN]; + + aec->data_dumper->DumpWav("aec_far", PART_LEN, + &farend_extended_block_lowest_band[PART_LEN], + std::min(aec->sampFreq, 16000), 1); + aec->data_dumper->DumpWav("aec_near", PART_LEN, &nearend_block[0][0], + std::min(aec->sampFreq, 16000), 1); + + if (aec->metricsMode == 1) { + // Update power levels + UpdateLevel( + &aec->farlevel, + CalculatePower(&farend_extended_block_lowest_band[PART_LEN], PART_LEN)); + UpdateLevel(&aec->nearlevel, + CalculatePower(&nearend_block[0][0], PART_LEN)); + } + + // Convert far-end signal to the frequency domain. + memcpy(fft, farend_extended_block_lowest_band, sizeof(float) * PART_LEN2); + Fft(aec->ooura_fft, fft, farend_fft); + + // Form extended nearend frame. + memcpy(&nearend_extended_block_lowest_band[0], + &aec->previous_nearend_block[0][0], sizeof(float) * PART_LEN); + memcpy(&nearend_extended_block_lowest_band[PART_LEN], &nearend_block[0][0], + sizeof(float) * PART_LEN); + + // Convert near-end signal to the frequency domain. + memcpy(fft, nearend_extended_block_lowest_band, sizeof(float) * PART_LEN2); + Fft(aec->ooura_fft, fft, nearend_fft); + + // Power smoothing. + if (aec->refined_adaptive_filter_enabled) { + for (i = 0; i < PART_LEN1; ++i) { + far_spectrum = farend_fft[0][i] * farend_fft[0][i] + + farend_fft[1][i] * farend_fft[1][i]; + // Calculate the magnitude spectrum. + abs_far_spectrum[i] = sqrtf(far_spectrum); + } + RegressorPower(aec->num_partitions, aec->xfBufBlockPos, aec->xfBuf, + aec->xPow); + } else { + for (i = 0; i < PART_LEN1; ++i) { + far_spectrum = farend_fft[0][i] * farend_fft[0][i] + + farend_fft[1][i] * farend_fft[1][i]; + aec->xPow[i] = + gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; + // Calculate the magnitude spectrum. + abs_far_spectrum[i] = sqrtf(far_spectrum); + } + } + + for (i = 0; i < PART_LEN1; ++i) { + near_spectrum = nearend_fft[0][i] * nearend_fft[0][i] + + nearend_fft[1][i] * nearend_fft[1][i]; + aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; + // Calculate the magnitude spectrum. + abs_near_spectrum[i] = sqrtf(near_spectrum); + } + + // Estimate noise power. Wait until dPow is more stable. + if (aec->noiseEstCtr > 50) { + for (i = 0; i < PART_LEN1; i++) { + if (aec->dPow[i] < aec->dMinPow[i]) { + aec->dMinPow[i] = + (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp; + } else { + aec->dMinPow[i] *= ramp; + } + } + } + + // Smooth increasing noise power from zero at the start, + // to avoid a sudden burst of comfort noise. + if (aec->noiseEstCtr < noiseInitBlocks) { + aec->noiseEstCtr++; + for (i = 0; i < PART_LEN1; i++) { + if (aec->dMinPow[i] > aec->dInitMinPow[i]) { + aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + + gInitNoise[1] * aec->dMinPow[i]; + } else { + aec->dInitMinPow[i] = aec->dMinPow[i]; + } + } + aec->noisePow = aec->dInitMinPow; + } else { + aec->noisePow = aec->dMinPow; + } + + // Block wise delay estimation used for logging + if (aec->delay_logging_enabled) { + if (WebRtc_AddFarSpectrumFloat(aec->delay_estimator_farend, + abs_far_spectrum, PART_LEN1) == 0) { + int delay_estimate = WebRtc_DelayEstimatorProcessFloat( + aec->delay_estimator, abs_near_spectrum, PART_LEN1); + if (delay_estimate >= 0) { + // Update delay estimate buffer. + aec->delay_histogram[delay_estimate]++; + aec->num_delay_values++; + } + if (aec->delay_metrics_delivered == 1 && + aec->num_delay_values >= kDelayMetricsAggregationWindow) { + UpdateDelayMetrics(aec); + } + } + } + + // Perform echo subtraction. + EchoSubtraction( + aec->ooura_fft, aec->num_partitions, aec->extended_filter_enabled, + &aec->extreme_filter_divergence, aec->filter_step_size, + aec->error_threshold, &farend_fft[0][0], &aec->xfBufBlockPos, aec->xfBuf, + &nearend_block[0][0], aec->xPow, aec->wfBuf, echo_subtractor_output); + aec->data_dumper->DumpRaw("aec_h_fft", PART_LEN1 * aec->num_partitions, + &aec->wfBuf[0][0]); + aec->data_dumper->DumpRaw("aec_h_fft", PART_LEN1 * aec->num_partitions, + &aec->wfBuf[1][0]); + + aec->data_dumper->DumpWav("aec_out_linear", PART_LEN, echo_subtractor_output, + std::min(aec->sampFreq, 16000), 1); + + if (aec->metricsMode == 1) { + UpdateLevel(&aec->linoutlevel, + CalculatePower(echo_subtractor_output, PART_LEN)); + } + + // Perform echo suppression. + EchoSuppression(aec->ooura_fft, aec, nearend_extended_block_lowest_band, + farend_extended_block_lowest_band, echo_subtractor_output, + output_block); + + if (aec->metricsMode == 1) { + UpdateLevel(&aec->nlpoutlevel, + CalculatePower(&output_block[0][0], PART_LEN)); + UpdateMetrics(aec); + } + + // Store the nearend signal until the next frame. + for (i = 0; i < aec->num_bands; ++i) { + memcpy(&aec->previous_nearend_block[i][0], &nearend_block[i][0], + sizeof(float) * PART_LEN); + } + + aec->data_dumper->DumpWav("aec_out", PART_LEN, &output_block[0][0], + std::min(aec->sampFreq, 16000), 1); +} + +AecCore* WebRtcAec_CreateAec(int instance_count) { + AecCore* aec = new AecCore(instance_count); + + if (!aec) { + return NULL; + } + aec->nearend_buffer_size = 0; + memset(&aec->nearend_buffer[0], 0, sizeof(aec->nearend_buffer)); + // Start the output buffer with zeros to be able to produce + // a full output frame in the first frame. + aec->output_buffer_size = PART_LEN - (FRAME_LEN - PART_LEN); + memset(&aec->output_buffer[0], 0, sizeof(aec->output_buffer)); + + aec->delay_estimator_farend = + WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); + if (aec->delay_estimator_farend == NULL) { + WebRtcAec_FreeAec(aec); + return NULL; + } + // We create the delay_estimator with the same amount of maximum lookahead as + // the delay history size (kHistorySizeBlocks) for symmetry reasons. + aec->delay_estimator = WebRtc_CreateDelayEstimator( + aec->delay_estimator_farend, kHistorySizeBlocks); + if (aec->delay_estimator == NULL) { + WebRtcAec_FreeAec(aec); + return NULL; + } +#ifdef WEBRTC_ANDROID + aec->delay_agnostic_enabled = 1; // DA-AEC enabled by default. + // DA-AEC assumes the system is causal from the beginning and will self adjust + // the lookahead when shifting is required. + WebRtc_set_lookahead(aec->delay_estimator, 0); +#else + aec->delay_agnostic_enabled = 0; + WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks); +#endif + aec->extended_filter_enabled = 0; + aec->refined_adaptive_filter_enabled = false; + + rtc::CritScope cs_init(&WebRtcAec_CriticalSection); + static bool initted = false; + if (!initted) { + // Assembly optimization + WebRtcAec_FilterFar = FilterFar; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; + WebRtcAec_FilterAdaptation = FilterAdaptation; + WebRtcAec_Overdrive = Overdrive; + WebRtcAec_Suppress = Suppress; + WebRtcAec_ComputeCoherence = ComputeCoherence; + WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectra; + WebRtcAec_StoreAsComplex = StoreAsComplex; + WebRtcAec_PartitionDelay = PartitionDelay; + WebRtcAec_WindowData = WindowData; + +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (WebRtc_GetCPUInfo(kSSE2)) { + WebRtcAec_InitAec_SSE2(); + } +#endif + +#if defined(MIPS_FPU_LE) + WebRtcAec_InitAec_mips(); +#endif + +#if defined(WEBRTC_HAS_NEON) + WebRtcAec_InitAec_neon(); +#endif + initted = true; + } + + return aec; +} + +void WebRtcAec_FreeAec(AecCore* aec) { + if (aec == NULL) { + return; + } + + WebRtc_FreeDelayEstimator(aec->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); + + delete aec; +} + +static void SetAdaptiveFilterStepSize(AecCore* aec) { + // Extended filter adaptation parameter. + // TODO(ajm): No narrowband tuning yet. + const float kExtendedMu = 0.4f; + + if (aec->refined_adaptive_filter_enabled) { + aec->filter_step_size = 0.05f; + } else { + if (aec->extended_filter_enabled) { + aec->filter_step_size = kExtendedMu; + } else { + if (aec->sampFreq == 8000) { + aec->filter_step_size = 0.6f; + } else { + aec->filter_step_size = 0.5f; + } + } + } +} + +static void SetErrorThreshold(AecCore* aec) { + // Extended filter adaptation parameter. + // TODO(ajm): No narrowband tuning yet. + static const float kExtendedErrorThreshold = 1.0e-6f; + + if (aec->extended_filter_enabled) { + aec->error_threshold = kExtendedErrorThreshold; + } else { + if (aec->sampFreq == 8000) { + aec->error_threshold = 2e-6f; + } else { + aec->error_threshold = 1.5e-6f; + } + } +} + +int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { + int i; + aec->data_dumper->InitiateNewSetOfRecordings(); + + aec->sampFreq = sampFreq; + + SetAdaptiveFilterStepSize(aec); + SetErrorThreshold(aec); + + if (sampFreq == 8000) { + aec->num_bands = 1; + } else { + aec->num_bands = (size_t)(sampFreq / 16000); + } + + // Start the output buffer with zeros to be able to produce + // a full output frame in the first frame. + aec->output_buffer_size = PART_LEN - (FRAME_LEN - PART_LEN); + memset(&aec->output_buffer[0], 0, sizeof(aec->output_buffer)); + aec->nearend_buffer_size = 0; + memset(&aec->nearend_buffer[0], 0, sizeof(aec->nearend_buffer)); + + // Initialize far-end buffer. + aec->farend_block_buffer_.ReInit(); + + aec->system_delay = 0; + + if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { + return -1; + } + aec->delay_logging_enabled = 0; + aec->delay_metrics_delivered = 0; + memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); + aec->num_delay_values = 0; + aec->delay_median = -1; + aec->delay_std = -1; + aec->fraction_poor_delays = -1.0f; + + aec->previous_delay = -2; // (-2): Uninitialized. + aec->delay_correction_count = 0; + aec->shift_offset = kInitialShiftOffset; + aec->delay_quality_threshold = kDelayQualityThresholdMin; + + aec->num_partitions = kNormalNumPartitions; + + // Update the delay estimator with filter length. We use half the + // |num_partitions| to take the echo path into account. In practice we say + // that the echo has a duration of maximum half |num_partitions|, which is not + // true, but serves as a crude measure. + WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2); + // TODO(bjornv): I currently hard coded the enable. Once we've established + // that AECM has no performance regression, robust_validation will be enabled + // all the time and the APIs to turn it on/off will be removed. Hence, remove + // this line then. + WebRtc_enable_robust_validation(aec->delay_estimator, 1); + aec->frame_count = 0; + + // Default target suppression mode. + aec->nlp_mode = 1; + + // Sampling frequency multiplier w.r.t. 8 kHz. + // In case of multiple bands we process the lower band in 16 kHz, hence the + // multiplier is always 2. + if (aec->num_bands > 1) { + aec->mult = 2; + } else { + aec->mult = static_cast<int16_t>(aec->sampFreq) / 8000; + } + + aec->farBufWritePos = 0; + aec->farBufReadPos = 0; + + aec->inSamples = 0; + aec->outSamples = 0; + aec->knownDelay = 0; + + // Initialize buffers + memset(aec->previous_nearend_block, 0, sizeof(aec->previous_nearend_block)); + memset(aec->eBuf, 0, sizeof(aec->eBuf)); + + memset(aec->xPow, 0, sizeof(aec->xPow)); + memset(aec->dPow, 0, sizeof(aec->dPow)); + memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); + aec->noisePow = aec->dInitMinPow; + aec->noiseEstCtr = 0; + + // Initial comfort noise power + for (i = 0; i < PART_LEN1; i++) { + aec->dMinPow[i] = 1.0e6f; + } + + // Holds the last block written to + aec->xfBufBlockPos = 0; + // TODO(peah): Investigate need for these initializations. Deleting them + // doesn't change the output at all and yields 0.4% overall speedup. + memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->coherence_state.sde, 0, sizeof(complex_t) * PART_LEN1); + memset(aec->coherence_state.sxd, 0, sizeof(complex_t) * PART_LEN1); + memset(aec->xfwBuf, 0, + sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->coherence_state.se, 0, sizeof(float) * PART_LEN1); + + // To prevent numerical instability in the first block. + for (i = 0; i < PART_LEN1; i++) { + aec->coherence_state.sd[i] = 1; + } + for (i = 0; i < PART_LEN1; i++) { + aec->coherence_state.sx[i] = 1; + } + + memset(aec->hNs, 0, sizeof(aec->hNs)); + memset(aec->outBuf, 0, sizeof(float) * PART_LEN); + + aec->hNlFbMin = 1; + aec->hNlFbLocalMin = 1; + aec->hNlXdAvgMin = 1; + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = 2; + aec->overdrive_scaling = 2; + aec->delayIdx = 0; + aec->stNearState = 0; + aec->echoState = 0; + aec->divergeState = 0; + + aec->seed = 777; + aec->delayEstCtr = 0; + + aec->extreme_filter_divergence = 0; + + // Metrics disabled by default + aec->metricsMode = 0; + InitMetrics(aec); + + return 0; +} + +void WebRtcAec_BufferFarendBlock(AecCore* aec, const float* farend) { + // Check if the buffer is full, and in that case flush the oldest data. + if (aec->farend_block_buffer_.AvaliableSpace() < 1) { + aec->farend_block_buffer_.AdjustSize(1); + } + aec->farend_block_buffer_.Insert(farend); +} + +int WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(AecCore* aec, + int buffer_size_decrease) { + int achieved_buffer_size_decrease = + aec->farend_block_buffer_.AdjustSize(buffer_size_decrease); + aec->system_delay -= achieved_buffer_size_decrease * PART_LEN; + return achieved_buffer_size_decrease; +} + +void FormNearendBlock( + size_t nearend_start_index, + size_t num_bands, + const float* const* nearend_frame, + size_t num_samples_from_nearend_frame, + const float nearend_buffer[NUM_HIGH_BANDS_MAX + 1] + [PART_LEN - (FRAME_LEN - PART_LEN)], + float nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN]) { + RTC_DCHECK_LE(num_samples_from_nearend_frame, PART_LEN); + const int num_samples_from_buffer = PART_LEN - num_samples_from_nearend_frame; + + if (num_samples_from_buffer > 0) { + for (size_t i = 0; i < num_bands; ++i) { + memcpy(&nearend_block[i][0], &nearend_buffer[i][0], + num_samples_from_buffer * sizeof(float)); + } + } + + for (size_t i = 0; i < num_bands; ++i) { + memcpy(&nearend_block[i][num_samples_from_buffer], + &nearend_frame[i][nearend_start_index], + num_samples_from_nearend_frame * sizeof(float)); + } +} + +void BufferNearendFrame( + size_t nearend_start_index, + size_t num_bands, + const float* const* nearend_frame, + size_t num_samples_to_buffer, + float nearend_buffer[NUM_HIGH_BANDS_MAX + 1] + [PART_LEN - (FRAME_LEN - PART_LEN)]) { + for (size_t i = 0; i < num_bands; ++i) { + memcpy( + &nearend_buffer[i][0], + &nearend_frame[i] + [nearend_start_index + FRAME_LEN - num_samples_to_buffer], + num_samples_to_buffer * sizeof(float)); + } +} + +void BufferOutputBlock(size_t num_bands, + const float output_block[NUM_HIGH_BANDS_MAX + 1] + [PART_LEN], + size_t* output_buffer_size, + float output_buffer[NUM_HIGH_BANDS_MAX + 1] + [2 * PART_LEN]) { + for (size_t i = 0; i < num_bands; ++i) { + memcpy(&output_buffer[i][*output_buffer_size], &output_block[i][0], + PART_LEN * sizeof(float)); + } + (*output_buffer_size) += PART_LEN; +} + +void FormOutputFrame(size_t output_start_index, + size_t num_bands, + size_t* output_buffer_size, + float output_buffer[NUM_HIGH_BANDS_MAX + 1][2 * PART_LEN], + float* const* output_frame) { + RTC_DCHECK_LE(FRAME_LEN, *output_buffer_size); + for (size_t i = 0; i < num_bands; ++i) { + memcpy(&output_frame[i][output_start_index], &output_buffer[i][0], + FRAME_LEN * sizeof(float)); + } + (*output_buffer_size) -= FRAME_LEN; + if (*output_buffer_size > 0) { + RTC_DCHECK_GE(2 * PART_LEN - FRAME_LEN, (*output_buffer_size)); + for (size_t i = 0; i < num_bands; ++i) { + memcpy(&output_buffer[i][0], &output_buffer[i][FRAME_LEN], + (*output_buffer_size) * sizeof(float)); + } + } +} + +void WebRtcAec_ProcessFrames(AecCore* aec, + const float* const* nearend, + size_t num_bands, + size_t num_samples, + int knownDelay, + float* const* out) { + RTC_DCHECK(num_samples == 80 || num_samples == 160); + + aec->frame_count++; + // For each frame the process is as follows: + // 1) If the system_delay indicates on being too small for processing a + // frame we stuff the buffer with enough data for 10 ms. + // 2 a) Adjust the buffer to the system delay, by moving the read pointer. + // b) Apply signal based delay correction, if we have detected poor AEC + // performance. + // 3) TODO(bjornv): Investigate if we need to add this: + // If we can't move read pointer due to buffer size limitations we + // flush/stuff the buffer. + // 4) Process as many partitions as possible. + // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN + // samples. Even though we will have data left to process (we work with + // partitions) we consider updating a whole frame, since that's the + // amount of data we input and output in audio_processing. + // 6) Update the outputs. + + // The AEC has two different delay estimation algorithms built in. The + // first relies on delay input values from the user and the amount of + // shifted buffer elements is controlled by |knownDelay|. This delay will + // give a guess on how much we need to shift far-end buffers to align with + // the near-end signal. The other delay estimation algorithm uses the + // far- and near-end signals to find the offset between them. This one + // (called "signal delay") is then used to fine tune the alignment, or + // simply compensate for errors in the system based one. + // Note that the two algorithms operate independently. Currently, we only + // allow one algorithm to be turned on. + + RTC_DCHECK_EQ(aec->num_bands, num_bands); + + for (size_t j = 0; j < num_samples; j += FRAME_LEN) { + // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we + // have enough far-end data for that by stuffing the buffer if the + // |system_delay| indicates others. + if (aec->system_delay < FRAME_LEN) { + // We don't have enough data so we rewind 10 ms. + WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(aec, -(aec->mult + 1)); + } + + if (!aec->delay_agnostic_enabled) { + // 2 a) Compensate for a possible change in the system delay. + + // TODO(bjornv): Investigate how we should round the delay difference; + // right now we know that incoming |knownDelay| is underestimated when + // it's less than |aec->knownDelay|. We therefore, round (-32) in that + // direction. In the other direction, we don't have this situation, but + // might flush one partition too little. This can cause non-causality, + // which should be investigated. Maybe, allow for a non-symmetric + // rounding, like -16. + int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; + int moved_elements = aec->farend_block_buffer_.AdjustSize(move_elements); + MaybeLogDelayAdjustment(moved_elements * (aec->sampFreq == 8000 ? 8 : 4), + DelaySource::kSystemDelay); + aec->knownDelay -= moved_elements * PART_LEN; + } else { + // 2 b) Apply signal based delay correction. + int move_elements = SignalBasedDelayCorrection(aec); + int moved_elements = aec->farend_block_buffer_.AdjustSize(move_elements); + MaybeLogDelayAdjustment(moved_elements * (aec->sampFreq == 8000 ? 8 : 4), + DelaySource::kDelayAgnostic); + int far_near_buffer_diff = + aec->farend_block_buffer_.Size() - + (aec->nearend_buffer_size + FRAME_LEN) / PART_LEN; + WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); + WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, + moved_elements); + // If we rely on reported system delay values only, a buffer underrun here + // can never occur since we've taken care of that in 1) above. Here, we + // apply signal based delay correction and can therefore end up with + // buffer underruns since the delay estimation can be wrong. We therefore + // stuff the buffer with enough elements if needed. + if (far_near_buffer_diff < 0) { + WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(aec, + far_near_buffer_diff); + } + } + + static_assert( + 16 == (FRAME_LEN - PART_LEN), + "These constants need to be properly related for this code to work"); + float output_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN]; + float nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN]; + float farend_extended_block_lowest_band[PART_LEN2]; + + // Form and process a block of nearend samples, buffer the output block of + // samples. + aec->farend_block_buffer_.ExtractExtendedBlock( + farend_extended_block_lowest_band); + FormNearendBlock(j, num_bands, nearend, PART_LEN - aec->nearend_buffer_size, + aec->nearend_buffer, nearend_block); + ProcessNearendBlock(aec, farend_extended_block_lowest_band, nearend_block, + output_block); + BufferOutputBlock(num_bands, output_block, &aec->output_buffer_size, + aec->output_buffer); + + if ((FRAME_LEN - PART_LEN + aec->nearend_buffer_size) == PART_LEN) { + // When possible (every fourth frame) form and process a second block of + // nearend samples, buffer the output block of samples. + aec->farend_block_buffer_.ExtractExtendedBlock( + farend_extended_block_lowest_band); + FormNearendBlock(j + FRAME_LEN - PART_LEN, num_bands, nearend, PART_LEN, + aec->nearend_buffer, nearend_block); + ProcessNearendBlock(aec, farend_extended_block_lowest_band, nearend_block, + output_block); + BufferOutputBlock(num_bands, output_block, &aec->output_buffer_size, + aec->output_buffer); + + // Reset the buffer size as there are no samples left in the nearend input + // to buffer. + aec->nearend_buffer_size = 0; + } else { + // Buffer the remaining samples in the nearend input. + aec->nearend_buffer_size += FRAME_LEN - PART_LEN; + BufferNearendFrame(j, num_bands, nearend, aec->nearend_buffer_size, + aec->nearend_buffer); + } + + // 5) Update system delay with respect to the entire frame. + aec->system_delay -= FRAME_LEN; + + // 6) Form the output frame. + FormOutputFrame(j, num_bands, &aec->output_buffer_size, aec->output_buffer, + out); + } +} + +int WebRtcAec_GetDelayMetricsCore(AecCore* self, + int* median, + int* std, + float* fraction_poor_delays) { + RTC_DCHECK(self); + RTC_DCHECK(median); + RTC_DCHECK(std); + + if (self->delay_logging_enabled == 0) { + // Logging disabled. + return -1; + } + + if (self->delay_metrics_delivered == 0) { + UpdateDelayMetrics(self); + self->delay_metrics_delivered = 1; + } + *median = self->delay_median; + *std = self->delay_std; + *fraction_poor_delays = self->fraction_poor_delays; + + return 0; +} + +int WebRtcAec_echo_state(AecCore* self) { + return self->echoState; +} + +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, + Stats* a_nlp, + float* divergent_filter_fraction) { + RTC_DCHECK(erl); + RTC_DCHECK(erle); + RTC_DCHECK(a_nlp); + *erl = self->erl; + *erle = self->erle; + *a_nlp = self->aNlp; + *divergent_filter_fraction = + self->divergent_filter_fraction.GetLatestFraction(); +} + +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, + int delay_logging) { + RTC_DCHECK_GE(nlp_mode, 0); + RTC_DCHECK_LT(nlp_mode, 3); + self->nlp_mode = nlp_mode; + self->metricsMode = metrics_mode; + if (self->metricsMode) { + InitMetrics(self); + } + // Turn on delay logging if it is either set explicitly or if delay agnostic + // AEC is enabled (which requires delay estimates). + self->delay_logging_enabled = delay_logging || self->delay_agnostic_enabled; + if (self->delay_logging_enabled) { + memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); + } +} + +void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable) { + self->delay_agnostic_enabled = enable; +} + +int WebRtcAec_delay_agnostic_enabled(AecCore* self) { + return self->delay_agnostic_enabled; +} + +void WebRtcAec_enable_refined_adaptive_filter(AecCore* self, bool enable) { + self->refined_adaptive_filter_enabled = enable; + SetAdaptiveFilterStepSize(self); + SetErrorThreshold(self); +} + +bool WebRtcAec_refined_adaptive_filter_enabled(const AecCore* self) { + return self->refined_adaptive_filter_enabled; +} + +void WebRtcAec_enable_extended_filter(AecCore* self, int enable) { + self->extended_filter_enabled = enable; + SetAdaptiveFilterStepSize(self); + SetErrorThreshold(self); + self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions; + // Update the delay estimator with filter length. See InitAEC() for details. + WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2); +} + +int WebRtcAec_extended_filter_enabled(AecCore* self) { + return self->extended_filter_enabled; +} + +int WebRtcAec_system_delay(AecCore* self) { + return self->system_delay; +} + +void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { + RTC_DCHECK_GE(delay, 0); + self->system_delay = delay; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core.h new file mode 100644 index 0000000000..78596ec4b4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core.h @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Specifies the interface for the AEC core. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ +#define MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ + +#include <stddef.h> + +#include <memory> + +extern "C" { +#include "common_audio/ring_buffer.h" +} +#include "common_audio/wav_file.h" +#include "modules/audio_processing/aec/aec_common.h" +#include "modules/audio_processing/utility/block_mean_calculator.h" +#include "modules/audio_processing/utility/ooura_fft.h" +#include "rtc_base/constructormagic.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +#define FRAME_LEN 80 +#define PART_LEN 64 // Length of partition +#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients +#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 +#define NUM_HIGH_BANDS_MAX 2 // Max number of high bands + +class ApmDataDumper; + +typedef float complex_t[2]; +// For performance reasons, some arrays of complex numbers are replaced by twice +// as long arrays of float, all the real parts followed by all the imaginary +// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and +// is better than two arrays (one for the real parts and one for the imaginary +// parts) as this other way would require two pointers instead of one and cause +// extra register spilling. This also allows the offsets to be calculated at +// compile time. + +// Metrics +enum { kOffsetLevel = -100 }; + +typedef struct Stats { + float instant; + float average; + float min; + float max; + float sum; + float hisum; + float himean; + size_t counter; + size_t hicounter; +} Stats; + +// Number of partitions for the extended filter mode. The first one is an enum +// to be used in array declarations, as it represents the maximum filter length. +enum { kExtendedNumPartitions = 32 }; +static const int kNormalNumPartitions = 12; + +// Delay estimator constants, used for logging and delay compensation if +// if reported delays are disabled. +enum { kLookaheadBlocks = 15 }; +enum { + // 500 ms for 16 kHz which is equivalent with the limit of reported delays. + kHistorySizeBlocks = 125 +}; + +typedef struct PowerLevel { + PowerLevel(); + + BlockMeanCalculator framelevel; + BlockMeanCalculator averagelevel; + float minlevel; +} PowerLevel; + +class BlockBuffer { + public: + BlockBuffer(); + ~BlockBuffer(); + void ReInit(); + void Insert(const float block[PART_LEN]); + void ExtractExtendedBlock(float extended_block[PART_LEN]); + int AdjustSize(int buffer_size_decrease); + size_t Size(); + size_t AvaliableSpace(); + + private: + RingBuffer* buffer_; +}; + +class DivergentFilterFraction { + public: + DivergentFilterFraction(); + + // Reset. + void Reset(); + + void AddObservation(const PowerLevel& nearlevel, + const PowerLevel& linoutlevel, + const PowerLevel& nlpoutlevel); + + // Return the latest fraction. + float GetLatestFraction() const; + + private: + // Clear all values added. + void Clear(); + + size_t count_; + size_t occurrence_; + float fraction_; + + RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction); +}; + +typedef struct CoherenceState { + complex_t sde[PART_LEN1]; // cross-psd of nearend and error + complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend + float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd +} CoherenceState; + +struct AecCore { + explicit AecCore(int instance_index); + ~AecCore(); + + std::unique_ptr<ApmDataDumper> data_dumper; + const OouraFft ooura_fft; + + CoherenceState coherence_state; + + int farBufWritePos, farBufReadPos; + + int knownDelay; + int inSamples, outSamples; + int delayEstCtr; + + // Nearend buffer used for changing from FRAME_LEN to PART_LEN sample block + // sizes. The buffer stores all the incoming bands and for each band a maximum + // of PART_LEN - (FRAME_LEN - PART_LEN) values need to be buffered in order to + // change the block size from FRAME_LEN to PART_LEN. + float nearend_buffer[NUM_HIGH_BANDS_MAX + 1] + [PART_LEN - (FRAME_LEN - PART_LEN)]; + size_t nearend_buffer_size; + float output_buffer[NUM_HIGH_BANDS_MAX + 1][2 * PART_LEN]; + size_t output_buffer_size; + + float eBuf[PART_LEN2]; // error + + float previous_nearend_block[NUM_HIGH_BANDS_MAX + 1][PART_LEN]; + + float xPow[PART_LEN1]; + float dPow[PART_LEN1]; + float dMinPow[PART_LEN1]; + float dInitMinPow[PART_LEN1]; + float* noisePow; + + float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer + float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft + // Farend windowed fft buffer. + complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1]; + + float hNs[PART_LEN1]; + float hNlFbMin, hNlFbLocalMin; + float hNlXdAvgMin; + int hNlNewMin, hNlMinCtr; + float overDrive; + float overdrive_scaling; + int nlp_mode; + float outBuf[PART_LEN]; + int delayIdx; + + short stNearState, echoState; + short divergeState; + + int xfBufBlockPos; + + BlockBuffer farend_block_buffer_; + + int system_delay; // Current system delay buffered in AEC. + + int mult; // sampling frequency multiple + int sampFreq = 16000; + size_t num_bands; + uint32_t seed; + + float filter_step_size; // stepsize + float error_threshold; // error threshold + + int noiseEstCtr; + + PowerLevel farlevel; + PowerLevel nearlevel; + PowerLevel linoutlevel; + PowerLevel nlpoutlevel; + + int metricsMode; + int stateCounter; + Stats erl; + Stats erle; + Stats aNlp; + Stats rerl; + DivergentFilterFraction divergent_filter_fraction; + + // Quantities to control H band scaling for SWB input + int freq_avg_ic; // initial bin for averaging nlp gain + int flag_Hband_cn; // for comfort noise + float cn_scale_Hband; // scale for comfort noise in H band + + int delay_metrics_delivered; + int delay_histogram[kHistorySizeBlocks]; + int num_delay_values; + int delay_median; + int delay_std; + float fraction_poor_delays; + int delay_logging_enabled; + void* delay_estimator_farend; + void* delay_estimator; + // Variables associated with delay correction through signal based delay + // estimation feedback. + int previous_delay; + int delay_correction_count; + int shift_offset; + float delay_quality_threshold; + int frame_count; + + // 0 = delay agnostic mode (signal based delay correction) disabled. + // Otherwise enabled. + int delay_agnostic_enabled; + // 1 = extended filter mode enabled, 0 = disabled. + int extended_filter_enabled; + // 1 = refined filter adaptation aec mode enabled, 0 = disabled. + bool refined_adaptive_filter_enabled; + + // Runtime selection of number of filter partitions. + int num_partitions; + + // Flag that extreme filter divergence has been detected by the Echo + // Suppressor. + int extreme_filter_divergence; +}; + +AecCore* WebRtcAec_CreateAec(int instance_count); // Returns NULL on error. +void WebRtcAec_FreeAec(AecCore* aec); +int WebRtcAec_InitAec(AecCore* aec, int sampFreq); +void WebRtcAec_InitAec_SSE2(void); +#if defined(MIPS_FPU_LE) +void WebRtcAec_InitAec_mips(void); +#endif +#if defined(WEBRTC_HAS_NEON) +void WebRtcAec_InitAec_neon(void); +#endif + +void WebRtcAec_BufferFarendBlock(AecCore* aec, const float* farend); +void WebRtcAec_ProcessFrames(AecCore* aec, + const float* const* nearend, + size_t num_bands, + size_t num_samples, + int knownDelay, + float* const* out); + +// A helper function to call adjust the farend buffer size. +// Returns the number of elements the size was decreased with, and adjusts +// |system_delay| by the corresponding amount in ms. +int WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(AecCore* aec, + int size_decrease); + +// Calculates the median, standard deviation and amount of poor values among the +// delay estimates aggregated up to the first call to the function. After that +// first call the metrics are aggregated and updated every second. With poor +// values we mean values that most likely will cause the AEC to perform poorly. +// TODO(bjornv): Consider changing tests and tools to handle constant +// constant aggregation window throughout the session instead. +int WebRtcAec_GetDelayMetricsCore(AecCore* self, + int* median, + int* std, + float* fraction_poor_delays); + +// Returns the echo state (1: echo, 0: no echo). +int WebRtcAec_echo_state(AecCore* self); + +// Gets statistics of the echo metrics ERL, ERLE, A_NLP. +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, + Stats* a_nlp, + float* divergent_filter_fraction); + +// Sets local configuration modes. +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, + int delay_logging); + +// Non-zero enables, zero disables. +void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable); + +// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is +// enabled and zero if disabled. +int WebRtcAec_delay_agnostic_enabled(AecCore* self); + +// Turns on/off the refined adaptive filter feature. +void WebRtcAec_enable_refined_adaptive_filter(AecCore* self, bool enable); + +// Returns whether the refined adaptive filter is enabled. +bool WebRtcAec_refined_adaptive_filter(const AecCore* self); + +// Enables or disables extended filter mode. Non-zero enables, zero disables. +void WebRtcAec_enable_extended_filter(AecCore* self, int enable); + +// Returns non-zero if extended filter mode is enabled and zero if disabled. +int WebRtcAec_extended_filter_enabled(AecCore* self); + +// Returns the current |system_delay|, i.e., the buffered difference between +// far-end and near-end. +int WebRtcAec_system_delay(AecCore* self); + +// Sets the |system_delay| to |value|. Note that if the value is changed +// improperly, there can be a performance regression. So it should be used with +// care. +void WebRtcAec_SetSystemDelay(AecCore* self, int delay); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_mips.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_mips.cc new file mode 100644 index 0000000000..ebe6349e93 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_mips.cc @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, which is presented with time-aligned signals. + */ + +#include "modules/audio_processing/aec/aec_core.h" + +#include <math.h> + +extern "C" { +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aec/aec_core_optimized_methods.h" +#include "modules/audio_processing/utility/ooura_fft.h" + +namespace webrtc { + +extern const float WebRtcAec_weightCurve[65]; +extern const float WebRtcAec_overDriveCurve[65]; + +void WebRtcAec_FilterFar_mips( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { + int i; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); + } + float* yf0 = y_fft[0]; + float* yf1 = y_fft[1]; + float* aRe = x_fft_buf[0] + xPos; + float* aIm = x_fft_buf[1] + xPos; + float* bRe = h_fft_buf[0] + pos; + float* bIm = h_fft_buf[1] + pos; + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; + int len = PART_LEN1 >> 1; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 4(%[bRe]) \n\t" + "lwc1 %[f6], 4(%[bIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" + "mul.s %[f9], %[f4], %[f5] \n\t" + "mul.s %[f4], %[f4], %[f6] \n\t" + "lwc1 %[f7], 4(%[aIm]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f12], %[f2], %[f3] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "mul.s %[f11], %[f6], %[f7] \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "sub.s %[f8], %[f8], %[f12] \n\t" + "mul.s %[f12], %[f7], %[f5] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "add.s %[f1], %[f0], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" + "sub.s %[f9], %[f9], %[f11] \n\t" + "lwc1 %[f6], 4(%[yf0]) \n\t" + "add.s %[f4], %[f4], %[f12] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" + "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" + "lwc1 %[f6], 4(%[yf0]) \n\t" + "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "lwc1 %[f5], 4(%[yf1]) \n\t" + "add.s %[f2], %[f2], %[f8] \n\t" + "addiu %[bRe], %[bRe], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "add.s %[f3], %[f3], %[f1] \n\t" + "add.s %[f6], %[f6], %[f9] \n\t" + "add.s %[f5], %[f5], %[f4] \n\t" + "swc1 %[f2], 0(%[yf0]) \n\t" + "swc1 %[f3], 0(%[yf1]) \n\t" + "swc1 %[f6], 4(%[yf0]) \n\t" + "swc1 %[f5], 4(%[yf1]) \n\t" + "addiu %[yf0], %[yf0], 8 \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[yf1], %[yf1], 8 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f12], %[f2], %[f3] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "sub.s %[f8], %[f8], %[f12] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "add.s %[f1], %[f0], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" +#else // #if !defined(MIPS32_R2_LE) + "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "add.s %[f2], %[f2], %[f8] \n\t" + "add.s %[f3], %[f3], %[f1] \n\t" + "swc1 %[f2], 0(%[yf0]) \n\t" + "swc1 %[f3], 0(%[yf1]) \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe), + [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm), + [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len) + : + : "memory"); + } +} + +void WebRtcAec_FilterAdaptation_mips( + const OouraFft& ooura_fft, + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; + int i; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); + int pos; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + pos = i * PART_LEN1; + float* aRe = x_fft_buf[0] + xPos; + float* aIm = x_fft_buf[1] + xPos; + float* bRe = e_fft[0]; + float* bIm = e_fft[1]; + float* fft_tmp; + + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12; + int len = PART_LEN >> 1; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 4(%[bRe]) \n\t" + "lwc1 %[f6], 4(%[bIm]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[bRe], %[bRe], 8 \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f9], %[f4], %[f5] \n\t" + "lwc1 %[f7], 4(%[aIm]) \n\t" + "mul.s %[f4], %[f4], %[f6] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f10], %[f3], %[f2] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "mul.s %[f11], %[f7], %[f6] \n\t" + "mul.s %[f5], %[f7], %[f5] \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "add.s %[f8], %[f8], %[f10] \n\t" + "sub.s %[f1], %[f0], %[f1] \n\t" + "add.s %[f9], %[f9], %[f11] \n\t" + "sub.s %[f5], %[f4], %[f5] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" + "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" + "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" + "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[f8], 0(%[fft_tmp]) \n\t" + "swc1 %[f1], 4(%[fft_tmp]) \n\t" + "swc1 %[f9], 8(%[fft_tmp]) \n\t" + "swc1 %[f5], 12(%[fft_tmp]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f10], %[f3], %[f2] \n\t" + "add.s %[f8], %[f8], %[f10] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[f8], 4(%[fft]) \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm), + [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp), + [len] "+r" (len) + : [fft] "r" (fft) + : "memory"); + + ooura_fft.InverseFft(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "addiu %[len], $zero, 8 \n\t" + "1: \n\t" + "addiu %[len], %[len], -1 \n\t" + "lwc1 %[f0], 0(%[fft_tmp]) \n\t" + "lwc1 %[f1], 4(%[fft_tmp]) \n\t" + "lwc1 %[f2], 8(%[fft_tmp]) \n\t" + "lwc1 %[f3], 12(%[fft_tmp]) \n\t" + "mul.s %[f0], %[f0], %[scale] \n\t" + "mul.s %[f1], %[f1], %[scale] \n\t" + "mul.s %[f2], %[f2], %[scale] \n\t" + "mul.s %[f3], %[f3], %[scale] \n\t" + "lwc1 %[f4], 16(%[fft_tmp]) \n\t" + "lwc1 %[f5], 20(%[fft_tmp]) \n\t" + "lwc1 %[f6], 24(%[fft_tmp]) \n\t" + "lwc1 %[f7], 28(%[fft_tmp]) \n\t" + "mul.s %[f4], %[f4], %[scale] \n\t" + "mul.s %[f5], %[f5], %[scale] \n\t" + "mul.s %[f6], %[f6], %[scale] \n\t" + "mul.s %[f7], %[f7], %[scale] \n\t" + "swc1 %[f0], 0(%[fft_tmp]) \n\t" + "swc1 %[f1], 4(%[fft_tmp]) \n\t" + "swc1 %[f2], 8(%[fft_tmp]) \n\t" + "swc1 %[f3], 12(%[fft_tmp]) \n\t" + "swc1 %[f4], 16(%[fft_tmp]) \n\t" + "swc1 %[f5], 20(%[fft_tmp]) \n\t" + "swc1 %[f6], 24(%[fft_tmp]) \n\t" + "swc1 %[f7], 28(%[fft_tmp]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), + [fft_tmp] "=&r" (fft_tmp) + : [scale] "f" (scale), [fft] "r" (fft) + : "memory"); + } + ooura_fft.Fft(fft); + aRe = h_fft_buf[0] + pos; + aIm = h_fft_buf[1] + pos; + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "addiu %[len], $zero, 31 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[fft_tmp]) \n\t" + "lwc1 %[f2], 256(%[aRe]) \n\t" + "lwc1 %[f3], 4(%[fft_tmp]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 8(%[fft_tmp]) \n\t" + "lwc1 %[f6], 4(%[aIm]) \n\t" + "lwc1 %[f7], 12(%[fft_tmp]) \n\t" + "add.s %[f0], %[f0], %[f1] \n\t" + "add.s %[f2], %[f2], %[f3] \n\t" + "add.s %[f4], %[f4], %[f5] \n\t" + "add.s %[f6], %[f6], %[f7] \n\t" + "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "swc1 %[f0], 0(%[aRe]) \n\t" + "swc1 %[f2], 256(%[aRe]) \n\t" + "swc1 %[f4], 4(%[aRe]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "swc1 %[f6], 4(%[aIm]) \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[fft_tmp]) \n\t" + "lwc1 %[f2], 0(%[aIm]) \n\t" + "lwc1 %[f3], 4(%[fft_tmp]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 8(%[fft_tmp]) \n\t" + "lwc1 %[f6], 4(%[aIm]) \n\t" + "lwc1 %[f7], 12(%[fft_tmp]) \n\t" + "add.s %[f0], %[f0], %[f1] \n\t" + "add.s %[f2], %[f2], %[f3] \n\t" + "add.s %[f4], %[f4], %[f5] \n\t" + "add.s %[f6], %[f6], %[f7] \n\t" + "addiu %[len], %[len], -1 \n\t" + "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "swc1 %[f0], 0(%[aRe]) \n\t" + "swc1 %[f2], 0(%[aIm]) \n\t" + "swc1 %[f4], 4(%[aRe]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "swc1 %[f6], 4(%[aIm]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[aIm], %[aIm], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), + [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm) + : [fft] "r" (fft) + : "memory"); + } +} + +void WebRtcAec_Overdrive_mips(float overdrive_scaling, + float hNlFb, + float hNl[PART_LEN1]) { + const float one = 1.0; + float* p_hNl; + const float* p_WebRtcAec_wC; + float temp1, temp2, temp3, temp4; + + p_hNl = &hNl[0]; + p_WebRtcAec_wC = &WebRtcAec_weightCurve[0]; + + for (int i = 0; i < PART_LEN1; ++i) { + // Weight subbands + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[temp1], 0(%[p_hNl]) \n\t" + "lwc1 %[temp2], 0(%[p_wC]) \n\t" + "c.lt.s %[hNlFb], %[temp1] \n\t" + "bc1f 1f \n\t" + " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" + "sub.s %[temp4], %[one], %[temp2] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[temp1], %[temp1], %[temp4] \n\t" + "add.s %[temp1], %[temp3], %[temp1] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[temp1], 0(%[p_hNl]) \n\t" + "1: \n\t" + "addiu %[p_wC], %[p_wC], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), + [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC) + : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl) + : "memory"); + + hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} + +void WebRtcAec_Suppress_mips(const float hNl[PART_LEN1], + float efw[2][PART_LEN1]) { + const float* p_hNl; + float* p_efw0; + float* p_efw1; + float temp1, temp2, temp3, temp4; + + p_hNl = &hNl[0]; + p_efw0 = &efw[0][0]; + p_efw1 = &efw[1][0]; + + for (int i = 0; i < PART_LEN1; ++i) { + __asm __volatile( + "lwc1 %[temp1], 0(%[p_hNl]) \n\t" + "lwc1 %[temp3], 0(%[p_efw1]) \n\t" + "lwc1 %[temp2], 0(%[p_efw0]) \n\t" + "addiu %[p_hNl], %[p_hNl], 4 \n\t" + "mul.s %[temp3], %[temp3], %[temp1] \n\t" + "mul.s %[temp2], %[temp2], %[temp1] \n\t" + "addiu %[p_efw0], %[p_efw0], 4 \n\t" + "addiu %[p_efw1], %[p_efw1], 4 \n\t" + "neg.s %[temp4], %[temp3] \n\t" + "swc1 %[temp2], -4(%[p_efw0]) \n\t" + "swc1 %[temp4], -4(%[p_efw1]) \n\t" + : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), + [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1), + [p_hNl] "+r" (p_hNl) + : + : "memory"); + } +} + +void WebRtcAec_ScaleErrorSignal_mips(float mu, + float error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + int len = (PART_LEN1); + float* ef0 = ef[0]; + float* ef1 = ef[1]; + float fac1 = 1e-10f; + float err_th2 = error_threshold * error_threshold; + float f0, f1, f2; +#if !defined(MIPS32_R2_LE) + float f3; +#endif + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[x_pow]) \n\t" + "lwc1 %[f1], 0(%[ef0]) \n\t" + "lwc1 %[f2], 0(%[ef1]) \n\t" + "add.s %[f0], %[f0], %[fac1] \n\t" + "div.s %[f1], %[f1], %[f0] \n\t" + "div.s %[f2], %[f2], %[f0] \n\t" + "mul.s %[f0], %[f1], %[f1] \n\t" +#if defined(MIPS32_R2_LE) + "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" +#else + "mul.s %[f3], %[f2], %[f2] \n\t" + "add.s %[f0], %[f0], %[f3] \n\t" +#endif + "c.le.s %[f0], %[err_th2] \n\t" + "nop \n\t" + "bc1t 2f \n\t" + " nop \n\t" + "sqrt.s %[f0], %[f0] \n\t" + "add.s %[f0], %[f0], %[fac1] \n\t" + "div.s %[f0], %[err_th], %[f0] \n\t" + "mul.s %[f1], %[f1], %[f0] \n\t" + "mul.s %[f2], %[f2], %[f0] \n\t" + "2: \n\t" + "mul.s %[f1], %[f1], %[mu] \n\t" + "mul.s %[f2], %[f2], %[mu] \n\t" + "swc1 %[f1], 0(%[ef0]) \n\t" + "swc1 %[f2], 0(%[ef1]) \n\t" + "addiu %[len], %[len], -1 \n\t" + "addiu %[x_pow], %[x_pow], 4 \n\t" + "addiu %[ef0], %[ef0], 4 \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[ef1], %[ef1], 4 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), +#if !defined(MIPS32_R2_LE) + [f3] "=&f" (f3), +#endif + [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), + [len] "+r" (len) + : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), + [err_th] "f" (error_threshold) + : "memory"); +} + +void WebRtcAec_InitAec_mips(void) { + WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; + WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; + WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; + WebRtcAec_Overdrive = WebRtcAec_Overdrive_mips; + WebRtcAec_Suppress = WebRtcAec_Suppress_mips; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_neon.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_neon.cc new file mode 100644 index 0000000000..1fbf56b8b6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_neon.cc @@ -0,0 +1,737 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, neon version of speed-critical functions. + * + * Based on aec_core_sse2.c. + */ + +#include <arm_neon.h> +#include <math.h> +#include <string.h> // memset + +extern "C" { +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aec/aec_common.h" +#include "modules/audio_processing/aec/aec_core_optimized_methods.h" +#include "modules/audio_processing/utility/ooura_fft.h" + +namespace webrtc { + +enum { kShiftExponentIntoTopMantissa = 8 }; +enum { kFloatExponentShift = 23 }; + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; +} + +static void FilterFarNEON(int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { + int i; + for (i = 0; i < num_partitions; i++) { + int j; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]); + const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]); + const float32x4_t h_fft_buf_re = vld1q_f32(&h_fft_buf[0][pos + j]); + const float32x4_t h_fft_buf_im = vld1q_f32(&h_fft_buf[1][pos + j]); + const float32x4_t y_fft_re = vld1q_f32(&y_fft[0][j]); + const float32x4_t y_fft_im = vld1q_f32(&y_fft[1][j]); + const float32x4_t a = vmulq_f32(x_fft_buf_re, h_fft_buf_re); + const float32x4_t e = vmlsq_f32(a, x_fft_buf_im, h_fft_buf_im); + const float32x4_t c = vmulq_f32(x_fft_buf_re, h_fft_buf_im); + const float32x4_t f = vmlaq_f32(c, x_fft_buf_im, h_fft_buf_re); + const float32x4_t g = vaddq_f32(y_fft_re, e); + const float32x4_t h = vaddq_f32(y_fft_im, f); + vst1q_f32(&y_fft[0][j], g); + vst1q_f32(&y_fft[1][j], h); + } + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); + } + } +} + +// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. +#if !defined(WEBRTC_ARCH_ARM64) +static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { + int i; + float32x4_t x = vrecpeq_f32(b); + // from arm documentation + // The Newton-Raphson iteration: + // x[n+1] = x[n] * (2 - d * x[n]) + // converges to (1/d) if x0 is the result of VRECPE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (i = 0; i < 2; i++) { + x = vmulq_f32(vrecpsq_f32(b, x), x); + } + // a/b = a*(1/b) + return vmulq_f32(a, x); +} + +static float32x4_t vsqrtq_f32(float32x4_t s) { + int i; + float32x4_t x = vrsqrteq_f32(s); + + // Code to handle sqrt(0). + // If the input to sqrtf() is zero, a zero will be returned. + // If the input to vrsqrteq_f32() is zero, positive infinity is returned. + const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000); + // check for divide by zero + const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x)); + // zero out the positive infinity results + x = vreinterpretq_f32_u32( + vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(x))); + // from arm documentation + // The Newton-Raphson iteration: + // x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2) + // converges to (1/√d) if x0 is the result of VRSQRTE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (i = 0; i < 2; i++) { + x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x); + } + // sqrt(s) = s * 1/sqrt(s) + return vmulq_f32(s, x); +} +#endif // WEBRTC_ARCH_ARM64 + +static void ScaleErrorSignalNEON(float mu, + float error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); + const float32x4_t kMu = vmovq_n_f32(mu); + const float32x4_t kThresh = vmovq_n_f32(error_threshold); + int i; + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]); + const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); + const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); + const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f); + float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); + float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); + const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); + const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im); + const float32x4_t absEf = vsqrtq_f32(ef_sum2); + const uint32x4_t bigger = vcgtq_f32(absEf, kThresh); + const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f); + const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus); + uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv)); + uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv)); + uint32x4_t ef_re_u32 = + vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(ef_re)); + uint32x4_t ef_im_u32 = + vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(ef_im)); + ef_re_if = vandq_u32(bigger, ef_re_if); + ef_im_if = vandq_u32(bigger, ef_im_if); + ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if); + ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if); + ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu); + ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu); + vst1q_f32(&ef[0][i], ef_re); + vst1q_f32(&ef[1][i], ef_im); + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + float abs_ef; + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; + } +} + +static void FilterAdaptationNEON( + const OouraFft& ooura_fft, + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; + int i; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; + int pos = i * PART_LEN1; + int j; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // Process the whole array... + for (j = 0; j < PART_LEN; j += 4) { + // Load x_fft_buf and e_fft. + const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]); + const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]); + const float32x4_t e_fft_re = vld1q_f32(&e_fft[0][j]); + const float32x4_t e_fft_im = vld1q_f32(&e_fft[1][j]); + // Calculate the product of conjugate(x_fft_buf) by e_fft. + // re(conjugate(a) * b) = aRe * bRe + aIm * bIm + // im(conjugate(a) * b)= aRe * bIm - aIm * bRe + const float32x4_t a = vmulq_f32(x_fft_buf_re, e_fft_re); + const float32x4_t e = vmlaq_f32(a, x_fft_buf_im, e_fft_im); + const float32x4_t c = vmulq_f32(x_fft_buf_re, e_fft_im); + const float32x4_t f = vmlsq_f32(c, x_fft_buf_im, e_fft_re); + // Interleave real and imaginary parts. + const float32x4x2_t g_n_h = vzipq_f32(e, f); + // Store + vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]); + vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); + } + // ... and fixup the first imaginary entry. + fft[1] = + MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], e_fft[1][PART_LEN]); + + ooura_fft.InverseFft(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + const float scale = 2.0f / PART_LEN2; + const float32x4_t scale_ps = vmovq_n_f32(scale); + for (j = 0; j < PART_LEN; j += 4) { + const float32x4_t fft_ps = vld1q_f32(&fft[j]); + const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps); + vst1q_f32(&fft[j], fft_scale); + } + } + ooura_fft.Fft(fft); + + { + const float wt1 = h_fft_buf[1][pos]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; + for (j = 0; j < PART_LEN; j += 4) { + float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]); + float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]); + const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); + const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); + const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); + wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]); + wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]); + + vst1q_f32(&h_fft_buf[0][pos + j], wtBuf_re); + vst1q_f32(&h_fft_buf[1][pos + j], wtBuf_im); + } + h_fft_buf[1][pos] = wt1; + } + } +} + +static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) { + // a^b = exp2(b * log2(a)) + // exp2(x) and log2(x) are calculated using polynomial approximations. + float32x4_t log2_a, b_log2_a, a_exp_b; + + // Calculate log2(x), x = a. + { + // To calculate log2(x), we decompose x like this: + // x = y * 2^n + // n is an integer + // y is in the [1.0, 2.0) range + // + // log2(x) = log2(y) + n + // n can be evaluated by playing with float representation. + // log2(y) in a small range can be approximated, this code uses an order + // five polynomial approximation. The coefficients have been + // estimated with the Remez algorithm and the resulting + // polynomial has a maximum relative error of 0.00086%. + + // Compute n. + // This is done by masking the exponent, shifting it into the top bit of + // the mantissa, putting eight into the biased exponent (to shift/ + // compensate the fact that the exponent has been shifted in the top/ + // fractional part and finally getting rid of the implicit leading one + // from the mantissa by substracting it out. + const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000); + const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000); + const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000); + const uint32x4_t two_n = + vandq_u32(vreinterpretq_u32_f32(a), vec_float_exponent_mask); + const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa); + const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent); + const float32x4_t n = + vsubq_f32(vreinterpretq_f32_u32(n_0), + vreinterpretq_f32_u32(vec_implicit_leading_one)); + // Compute y. + const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF); + const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000); + const uint32x4_t mantissa = + vandq_u32(vreinterpretq_u32_f32(a), vec_mantissa_mask); + const float32x4_t y = vreinterpretq_f32_u32( + vorrq_u32(mantissa, vec_zero_biased_exponent_is_one)); + // Approximate log2(y) ~= (y - 1) * pol5(y). + // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 + const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f); + const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f); + const float32x4_t C3 = vdupq_n_f32(-1.2315303f); + const float32x4_t C2 = vdupq_n_f32(2.5988452f); + const float32x4_t C1 = vdupq_n_f32(-3.3241990f); + const float32x4_t C0 = vdupq_n_f32(3.1157899f); + float32x4_t pol5_y = C5; + pol5_y = vmlaq_f32(C4, y, pol5_y); + pol5_y = vmlaq_f32(C3, y, pol5_y); + pol5_y = vmlaq_f32(C2, y, pol5_y); + pol5_y = vmlaq_f32(C1, y, pol5_y); + pol5_y = vmlaq_f32(C0, y, pol5_y); + const float32x4_t y_minus_one = + vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one)); + const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y); + + // Combine parts. + log2_a = vaddq_f32(n, log2_y); + } + + // b * log2(a) + b_log2_a = vmulq_f32(b, log2_a); + + // Calculate exp2(x), x = b * log2(a). + { + // To calculate 2^x, we decompose x like this: + // x = n + y + // n is an integer, the value of x - 0.5 rounded down, therefore + // y is in the [0.5, 1.5) range + // + // 2^x = 2^n * 2^y + // 2^n can be evaluated by playing with float representation. + // 2^y in a small range can be approximated, this code uses an order two + // polynomial approximation. The coefficients have been estimated + // with the Remez algorithm and the resulting polynomial has a + // maximum relative error of 0.17%. + // To avoid over/underflow, we reduce the range of input to ]-127, 129]. + const float32x4_t max_input = vdupq_n_f32(129.f); + const float32x4_t min_input = vdupq_n_f32(-126.99999f); + const float32x4_t x_min = vminq_f32(b_log2_a, max_input); + const float32x4_t x_max = vmaxq_f32(x_min, min_input); + // Compute n. + const float32x4_t half = vdupq_n_f32(0.5f); + const float32x4_t x_minus_half = vsubq_f32(x_max, half); + const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half); + + // Compute 2^n. + const int32x4_t float_exponent_bias = vdupq_n_s32(127); + const int32x4_t two_n_exponent = + vaddq_s32(x_minus_half_floor, float_exponent_bias); + const float32x4_t two_n = + vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift)); + // Compute y. + const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor)); + + // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. + const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f); + const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f); + const float32x4_t C0 = vdupq_n_f32(1.0017247f); + float32x4_t exp2_y = C2; + exp2_y = vmlaq_f32(C1, y, exp2_y); + exp2_y = vmlaq_f32(C0, y, exp2_y); + + // Combine parts. + a_exp_b = vmulq_f32(exp2_y, two_n); + } + + return a_exp_b; +} + +static void OverdriveNEON(float overdrive_scaling, + float hNlFb, + float hNl[PART_LEN1]) { + int i; + const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); + const float32x4_t vec_one = vdupq_n_f32(1.0f); + const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling); + + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + // Weight subbands + float32x4_t vec_hNl = vld1q_f32(&hNl[i]); + const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); + const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); + const float32x4_t vec_weightCurve_hNlFb = + vmulq_f32(vec_weightCurve, vec_hNlFb); + const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); + const float32x4_t vec_one_weightCurve_hNl = + vmulq_f32(vec_one_weightCurve, vec_hNl); + const uint32x4_t vec_if0 = + vandq_u32(vmvnq_u32(bigger), vreinterpretq_u32_f32(vec_hNl)); + const float32x4_t vec_one_weightCurve_add = + vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); + const uint32x4_t vec_if1 = + vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); + + vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); + + const float32x4_t vec_overDriveCurve = + vld1q_f32(&WebRtcAec_overDriveCurve[i]); + const float32x4_t vec_overDriveSm_overDriveCurve = + vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); + vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); + vst1q_f32(&hNl[i], vec_hNl); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + + hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} + +static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { + int i; + const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + float32x4_t vec_hNl = vld1q_f32(&hNl[i]); + float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); + float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); + vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); + vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); + vst1q_f32(&efw[0][i], vec_efw_re); + vst1q_f32(&efw[1][i], vec_efw_im); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +static int PartitionDelayNEON( + int num_partitions, + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + float32x4_t vec_wfEn = vdupq_n_f32(0.0f); + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const float32x4_t vec_wfBuf0 = vld1q_f32(&h_fft_buf[0][pos + j]); + const float32x4_t vec_wfBuf1 = vld1q_f32(&h_fft_buf[1][pos + j]); + vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); + vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); + } + { + float32x2_t vec_total; + // A B C D + vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); + // A+B C+D + vec_total = vpadd_f32(vec_total, vec_total); + // A+B+C+D A+B+C+D + wfEn = vget_lane_f32(vec_total, 0); + } + + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + + h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void UpdateCoherenceSpectraNEON(int mult, + bool extended_filter_enabled, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1], + CoherenceState* coherence_state, + short* filter_divergence_state, + int* extreme_filter_divergence) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = + extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[mult - 1]; + int i; + float sdSum = 0, seSum = 0; + const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); + float32x4_t vec_sdSum = vdupq_n_f32(0.0f); + float32x4_t vec_seSum = vdupq_n_f32(0.0f); + + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); + const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); + const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); + const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); + const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); + const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); + float32x4_t vec_sd = + vmulq_n_f32(vld1q_f32(&coherence_state->sd[i]), ptrGCoh[0]); + float32x4_t vec_se = + vmulq_n_f32(vld1q_f32(&coherence_state->se[i]), ptrGCoh[0]); + float32x4_t vec_sx = + vmulq_n_f32(vld1q_f32(&coherence_state->sx[i]), ptrGCoh[0]); + float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); + float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); + float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); + + vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); + vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); + vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); + vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); + vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); + vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); + vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); + + vst1q_f32(&coherence_state->sd[i], vec_sd); + vst1q_f32(&coherence_state->se[i], vec_se); + vst1q_f32(&coherence_state->sx[i], vec_sx); + + { + float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]); + float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); + float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); + vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); + vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); + vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); + vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); + vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); + vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); + vst2q_f32(&coherence_state->sde[i][0], vec_sde); + } + + { + float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]); + float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); + float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); + vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); + vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); + vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); + vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); + vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); + vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); + vst2q_f32(&coherence_state->sxd[i][0], vec_sxd); + } + + vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); + vec_seSum = vaddq_f32(vec_seSum, vec_se); + } + { + float32x2_t vec_sdSum_total; + float32x2_t vec_seSum_total; + // A B C D + vec_sdSum_total = + vpadd_f32(vget_low_f32(vec_sdSum), vget_high_f32(vec_sdSum)); + vec_seSum_total = + vpadd_f32(vget_low_f32(vec_seSum), vget_high_f32(vec_seSum)); + // A+B C+D + vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); + vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); + // A+B+C+D A+B+C+D + sdSum = vget_lane_f32(vec_sdSum_total, 0); + seSum = vget_lane_f32(vec_seSum_total, 0); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + coherence_state->sd[i] = + ptrGCoh[0] * coherence_state->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + coherence_state->se[i] = + ptrGCoh[0] * coherence_state->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + coherence_state->sx[i] = + ptrGCoh[0] * coherence_state->sx[i] + + ptrGCoh[1] * + WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + coherence_state->sde[i][0] = + ptrGCoh[0] * coherence_state->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + coherence_state->sde[i][1] = + ptrGCoh[0] * coherence_state->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + coherence_state->sxd[i][0] = + ptrGCoh[0] * coherence_state->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + coherence_state->sxd[i][1] = + ptrGCoh[0] * coherence_state->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += coherence_state->sd[i]; + seSum += coherence_state->se[i]; + } + + // Divergent filter safeguard update. + *filter_divergence_state = + (*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum; + + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); +} + +// Window time domain data to be used by the fft. +static void WindowDataNEON(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); + const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); + const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); + // A B C D + float32x4_t vec_sqrtHanning_rev = + vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); + // B A D C + vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); + // D C B A + vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), + vget_low_f32(vec_sqrtHanning_rev)); + vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); + vst1q_f32(&x_windowed[PART_LEN + i], + vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); + } +} + +// Puts fft output data into a complex valued array. +static void StoreAsComplexNEON(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); + vst1q_f32(&data_complex[0][i], vec_data.val[0]); + vst1q_f32(&data_complex[1][i], vec_data.val[1]); + } + // fix beginning/end values + data_complex[1][0] = 0; + data_complex[1][PART_LEN] = 0; + data_complex[0][0] = data[0]; + data_complex[0][PART_LEN] = data[1]; +} + +static void ComputeCoherenceNEON(const CoherenceState* coherence_state, + float* cohde, + float* cohxd) { + int i; + + { + const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); + + // Subband coherence + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t vec_sd = vld1q_f32(&coherence_state->sd[i]); + const float32x4_t vec_se = vld1q_f32(&coherence_state->se[i]); + const float32x4_t vec_sx = vld1q_f32(&coherence_state->sx[i]); + const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); + const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); + float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]); + float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]); + float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); + float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); + vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); + vec_cohde = vdivq_f32(vec_cohde, vec_sdse); + vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); + vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); + + vst1q_f32(&cohde[i], vec_cohde); + vst1q_f32(&cohxd[i], vec_cohxd); + } + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] + + coherence_state->sde[i][1] * coherence_state->sde[i][1]) / + (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f); + cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + + coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / + (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); + } +} + +void WebRtcAec_InitAec_neon(void) { + WebRtcAec_FilterFar = FilterFarNEON; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; + WebRtcAec_FilterAdaptation = FilterAdaptationNEON; + WebRtcAec_Overdrive = OverdriveNEON; + WebRtcAec_Suppress = SuppressNEON; + WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; + WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; + WebRtcAec_StoreAsComplex = StoreAsComplexNEON; + WebRtcAec_PartitionDelay = PartitionDelayNEON; + WebRtcAec_WindowData = WindowDataNEON; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h new file mode 100644 index 0000000000..a8a20e94d8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_OPTIMIZED_METHODS_H_ +#define MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_OPTIMIZED_METHODS_H_ + +#include <memory> + +#include "modules/audio_processing/aec/aec_core.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +typedef void (*WebRtcAecFilterFar)( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]); +extern WebRtcAecFilterFar WebRtcAec_FilterFar; +typedef void (*WebRtcAecScaleErrorSignal)(float mu, + float error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]); +extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +typedef void (*WebRtcAecFilterAdaptation)( + const OouraFft& ooura_fft, + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]); +extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; + +typedef void (*WebRtcAecOverdrive)(float overdrive_scaling, + const float hNlFb, + float hNl[PART_LEN1]); +extern WebRtcAecOverdrive WebRtcAec_Overdrive; + +typedef void (*WebRtcAecSuppress)(const float hNl[PART_LEN1], + float efw[2][PART_LEN1]); +extern WebRtcAecSuppress WebRtcAec_Suppress; + +typedef void (*WebRtcAecComputeCoherence)(const CoherenceState* coherence_state, + float* cohde, + float* cohxd); +extern WebRtcAecComputeCoherence WebRtcAec_ComputeCoherence; + +typedef void (*WebRtcAecUpdateCoherenceSpectra)(int mult, + bool extended_filter_enabled, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1], + CoherenceState* coherence_state, + short* filter_divergence_state, + int* extreme_filter_divergence); +extern WebRtcAecUpdateCoherenceSpectra WebRtcAec_UpdateCoherenceSpectra; + +typedef int (*WebRtcAecPartitionDelay)( + int num_partitions, + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]); +extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; + +typedef void (*WebRtcAecStoreAsComplex)(const float* data, + float data_complex[2][PART_LEN1]); +extern WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; + +typedef void (*WebRtcAecWindowData)(float* x_windowed, const float* x); +extern WebRtcAecWindowData WebRtcAec_WindowData; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_OPTIMIZED_METHODS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_sse2.cc new file mode 100644 index 0000000000..0532662bed --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_sse2.cc @@ -0,0 +1,751 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, SSE2 version of speed-critical functions. + */ + +#include <emmintrin.h> +#include <math.h> +#include <string.h> // memset + +extern "C" { +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aec/aec_common.h" +#include "modules/audio_processing/aec/aec_core_optimized_methods.h" +#include "modules/audio_processing/utility/ooura_fft.h" + +namespace webrtc { + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; +} + +static void FilterFarSSE2(int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { + int i; + for (i = 0; i < num_partitions; i++) { + int j; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); + } + + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]); + const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]); + const __m128 h_fft_buf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + const __m128 h_fft_buf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); + const __m128 y_fft_re = _mm_loadu_ps(&y_fft[0][j]); + const __m128 y_fft_im = _mm_loadu_ps(&y_fft[1][j]); + const __m128 a = _mm_mul_ps(x_fft_buf_re, h_fft_buf_re); + const __m128 b = _mm_mul_ps(x_fft_buf_im, h_fft_buf_im); + const __m128 c = _mm_mul_ps(x_fft_buf_re, h_fft_buf_im); + const __m128 d = _mm_mul_ps(x_fft_buf_im, h_fft_buf_re); + const __m128 e = _mm_sub_ps(a, b); + const __m128 f = _mm_add_ps(c, d); + const __m128 g = _mm_add_ps(y_fft_re, e); + const __m128 h = _mm_add_ps(y_fft_im, f); + _mm_storeu_ps(&y_fft[0][j], g); + _mm_storeu_ps(&y_fft[1][j], h); + } + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], h_fft_buf[1][pos + j]); + } + } +} + +static void ScaleErrorSignalSSE2(float mu, + float error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const __m128 k1e_10f = _mm_set1_ps(1e-10f); + const __m128 kMu = _mm_set1_ps(mu); + const __m128 kThresh = _mm_set1_ps(error_threshold); + + int i; + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); + const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); + const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); + + const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); + __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); + __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); + const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); + const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im); + const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2); + const __m128 absEf = _mm_sqrt_ps(ef_sum2); + const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh); + __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f); + const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus); + __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv); + __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv); + ef_re_if = _mm_and_ps(bigger, ef_re_if); + ef_im_if = _mm_and_ps(bigger, ef_im_if); + ef_re = _mm_andnot_ps(bigger, ef_re); + ef_im = _mm_andnot_ps(bigger, ef_im); + ef_re = _mm_or_ps(ef_re, ef_re_if); + ef_im = _mm_or_ps(ef_im, ef_im_if); + ef_re = _mm_mul_ps(ef_re, kMu); + ef_im = _mm_mul_ps(ef_im, kMu); + + _mm_storeu_ps(&ef[0][i], ef_re); + _mm_storeu_ps(&ef[1][i], ef_im); + } + // scalar code for the remaining items. + { + for (; i < (PART_LEN1); i++) { + float abs_ef; + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; + } + } +} + +static void FilterAdaptationSSE2( + const OouraFft& ooura_fft, + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; + int i, j; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); + int pos = i * PART_LEN1; + // Check for wrap + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // Process the whole array... + for (j = 0; j < PART_LEN; j += 4) { + // Load x_fft_buf and e_fft. + const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]); + const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]); + const __m128 e_fft_re = _mm_loadu_ps(&e_fft[0][j]); + const __m128 e_fft_im = _mm_loadu_ps(&e_fft[1][j]); + // Calculate the product of conjugate(x_fft_buf) by e_fft. + // re(conjugate(a) * b) = aRe * bRe + aIm * bIm + // im(conjugate(a) * b)= aRe * bIm - aIm * bRe + const __m128 a = _mm_mul_ps(x_fft_buf_re, e_fft_re); + const __m128 b = _mm_mul_ps(x_fft_buf_im, e_fft_im); + const __m128 c = _mm_mul_ps(x_fft_buf_re, e_fft_im); + const __m128 d = _mm_mul_ps(x_fft_buf_im, e_fft_re); + const __m128 e = _mm_add_ps(a, b); + const __m128 f = _mm_sub_ps(c, d); + // Interleave real and imaginary parts. + const __m128 g = _mm_unpacklo_ps(e, f); + const __m128 h = _mm_unpackhi_ps(e, f); + // Store + _mm_storeu_ps(&fft[2 * j + 0], g); + _mm_storeu_ps(&fft[2 * j + 4], h); + } + // ... and fixup the first imaginary entry. + fft[1] = + MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], e_fft[1][PART_LEN]); + + ooura_fft.InverseFft(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + const __m128 scale_ps = _mm_load_ps1(&scale); + for (j = 0; j < PART_LEN; j += 4) { + const __m128 fft_ps = _mm_loadu_ps(&fft[j]); + const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); + _mm_storeu_ps(&fft[j], fft_scale); + } + } + ooura_fft.Fft(fft); + + { + float wt1 = h_fft_buf[1][pos]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; + for (j = 0; j < PART_LEN; j += 4) { + __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); + const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); + const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); + const __m128 fft_re = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 fft_im = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); + wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); + wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); + _mm_storeu_ps(&h_fft_buf[0][pos + j], wtBuf_re); + _mm_storeu_ps(&h_fft_buf[1][pos + j], wtBuf_im); + } + h_fft_buf[1][pos] = wt1; + } + } +} + +static __m128 mm_pow_ps(__m128 a, __m128 b) { + // a^b = exp2(b * log2(a)) + // exp2(x) and log2(x) are calculated using polynomial approximations. + __m128 log2_a, b_log2_a, a_exp_b; + + // Calculate log2(x), x = a. + { + // To calculate log2(x), we decompose x like this: + // x = y * 2^n + // n is an integer + // y is in the [1.0, 2.0) range + // + // log2(x) = log2(y) + n + // n can be evaluated by playing with float representation. + // log2(y) in a small range can be approximated, this code uses an order + // five polynomial approximation. The coefficients have been + // estimated with the Remez algorithm and the resulting + // polynomial has a maximum relative error of 0.00086%. + + // Compute n. + // This is done by masking the exponent, shifting it into the top bit of + // the mantissa, putting eight into the biased exponent (to shift/ + // compensate the fact that the exponent has been shifted in the top/ + // fractional part and finally getting rid of the implicit leading one + // from the mantissa by substracting it out. + static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = { + 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}; + static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = { + 0x43800000, 0x43800000, 0x43800000, 0x43800000}; + static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = { + 0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000}; + static const int shift_exponent_into_top_mantissa = 8; + const __m128 two_n = + _mm_and_ps(a, *(reinterpret_cast<const __m128*>(float_exponent_mask))); + const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32( + _mm_castps_si128(two_n), shift_exponent_into_top_mantissa)); + const __m128 n_0 = + _mm_or_ps(n_1, *(reinterpret_cast<const __m128*>(eight_biased_exponent))); + const __m128 n = + _mm_sub_ps(n_0, *(reinterpret_cast<const __m128*>(implicit_leading_one))); + + // Compute y. + static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = { + 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}; + static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = { + 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000}; + const __m128 mantissa = + _mm_and_ps(a, *(reinterpret_cast<const __m128*>(mantissa_mask))); + const __m128 y = + _mm_or_ps(mantissa, + *(reinterpret_cast<const __m128*>(zero_biased_exponent_is_one))); + + // Approximate log2(y) ~= (y - 1) * pol5(y). + // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 + static const ALIGN16_BEG float ALIGN16_END C5[4] = { + -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f}; + static const ALIGN16_BEG float ALIGN16_END C4[4] = { + 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f}; + static const ALIGN16_BEG float ALIGN16_END C3[4] = { + -1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f}; + static const ALIGN16_BEG float ALIGN16_END C2[4] = {2.5988452f, 2.5988452f, + 2.5988452f, 2.5988452f}; + static const ALIGN16_BEG float ALIGN16_END C1[4] = { + -3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f}; + static const ALIGN16_BEG float ALIGN16_END C0[4] = {3.1157899f, 3.1157899f, + 3.1157899f, 3.1157899f}; + const __m128 pol5_y_0 = + _mm_mul_ps(y, *(reinterpret_cast<const __m128*>(C5))); + const __m128 pol5_y_1 = + _mm_add_ps(pol5_y_0, *(reinterpret_cast<const __m128*>(C4))); + const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y); + const __m128 pol5_y_3 = + _mm_add_ps(pol5_y_2, *(reinterpret_cast<const __m128*>(C3))); + const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y); + const __m128 pol5_y_5 = + _mm_add_ps(pol5_y_4, *(reinterpret_cast<const __m128*>(C2))); + const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y); + const __m128 pol5_y_7 = + _mm_add_ps(pol5_y_6, *(reinterpret_cast<const __m128*>(C1))); + const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y); + const __m128 pol5_y = + _mm_add_ps(pol5_y_8, *(reinterpret_cast<const __m128*>(C0))); + const __m128 y_minus_one = + _mm_sub_ps(y, + *(reinterpret_cast<const __m128*>(zero_biased_exponent_is_one))); + const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y); + + // Combine parts. + log2_a = _mm_add_ps(n, log2_y); + } + + // b * log2(a) + b_log2_a = _mm_mul_ps(b, log2_a); + + // Calculate exp2(x), x = b * log2(a). + { + // To calculate 2^x, we decompose x like this: + // x = n + y + // n is an integer, the value of x - 0.5 rounded down, therefore + // y is in the [0.5, 1.5) range + // + // 2^x = 2^n * 2^y + // 2^n can be evaluated by playing with float representation. + // 2^y in a small range can be approximated, this code uses an order two + // polynomial approximation. The coefficients have been estimated + // with the Remez algorithm and the resulting polynomial has a + // maximum relative error of 0.17%. + + // To avoid over/underflow, we reduce the range of input to ]-127, 129]. + static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f, + 129.f, 129.f}; + static const ALIGN16_BEG float min_input[4] ALIGN16_END = { + -126.99999f, -126.99999f, -126.99999f, -126.99999f}; + const __m128 x_min = + _mm_min_ps(b_log2_a, *(reinterpret_cast<const __m128*>(max_input))); + const __m128 x_max = + _mm_max_ps(x_min, *(reinterpret_cast<const __m128*>(min_input))); + // Compute n. + static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f, 0.5f, + 0.5f}; + const __m128 x_minus_half = + _mm_sub_ps(x_max, *(reinterpret_cast<const __m128*>(half))); + const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half); + // Compute 2^n. + static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = { + 127, 127, 127, 127}; + static const int float_exponent_shift = 23; + const __m128i two_n_exponent = + _mm_add_epi32(x_minus_half_floor, + *(reinterpret_cast<const __m128i*>(float_exponent_bias))); + const __m128 two_n = + _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift)); + // Compute y. + const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor)); + // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. + static const ALIGN16_BEG float C2[4] ALIGN16_END = { + 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f}; + static const ALIGN16_BEG float C1[4] ALIGN16_END = { + 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f}; + static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f, + 1.0017247f, 1.0017247f}; + const __m128 exp2_y_0 = + _mm_mul_ps(y, *(reinterpret_cast<const __m128*>(C2))); + const __m128 exp2_y_1 = + _mm_add_ps(exp2_y_0, *(reinterpret_cast<const __m128*>(C1))); + const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); + const __m128 exp2_y = + _mm_add_ps(exp2_y_2, *(reinterpret_cast<const __m128*>(C0))); + + // Combine parts. + a_exp_b = _mm_mul_ps(exp2_y, two_n); + } + return a_exp_b; +} + +static void OverdriveSSE2(float overdrive_scaling, + float hNlFb, + float hNl[PART_LEN1]) { + int i; + const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); + const __m128 vec_one = _mm_set1_ps(1.0f); + const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling); + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + // Weight subbands + __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); + const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); + const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); + const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); + const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); + const __m128 vec_one_weightCurve_hNl = + _mm_mul_ps(vec_one_weightCurve, vec_hNl); + const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); + const __m128 vec_if1 = _mm_and_ps( + bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); + vec_hNl = _mm_or_ps(vec_if0, vec_if1); + + const __m128 vec_overDriveCurve = + _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); + const __m128 vec_overDriveSm_overDriveCurve = + _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve); + vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); + _mm_storeu_ps(&hNl[i], vec_hNl); + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} + +static void SuppressSSE2(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { + int i; + const __m128 vec_minus_one = _mm_set1_ps(-1.0f); + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + // Suppress error signal + __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); + __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); + __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); + vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); + _mm_storeu_ps(&efw[0][i], vec_efw_re); + _mm_storeu_ps(&efw[1][i], vec_efw_im); + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +__inline static void _mm_add_ps_4x1(__m128 sum, float* dst) { + // A+B C+D + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2))); + // A+B+C+D A+B+C+D + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dst, sum); +} + +static int PartitionDelaySSE2( + int num_partitions, + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + __m128 vec_wfEn = _mm_set1_ps(0.0f); + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const __m128 vec_wfBuf0 = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + const __m128 vec_wfBuf1 = _mm_loadu_ps(&h_fft_buf[1][pos + j]); + vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0)); + vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1)); + } + _mm_add_ps_4x1(vec_wfEn, &wfEn); + + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + + h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void UpdateCoherenceSpectraSSE2(int mult, + bool extended_filter_enabled, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1], + CoherenceState* coherence_state, + short* filter_divergence_state, + int* extreme_filter_divergence) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = + extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[mult - 1]; + int i; + float sdSum = 0, seSum = 0; + const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD); + const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]); + const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]); + __m128 vec_sdSum = _mm_set1_ps(0.0f); + __m128 vec_seSum = _mm_set1_ps(0.0f); + + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]); + const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]); + const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]); + const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]); + const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]); + const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]); + __m128 vec_sd = + _mm_mul_ps(_mm_loadu_ps(&coherence_state->sd[i]), vec_GCoh0); + __m128 vec_se = + _mm_mul_ps(_mm_loadu_ps(&coherence_state->se[i]), vec_GCoh0); + __m128 vec_sx = + _mm_mul_ps(_mm_loadu_ps(&coherence_state->sx[i]), vec_GCoh0); + __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0); + __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0); + __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0); + vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1)); + vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1)); + vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1)); + vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15); + vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1)); + vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1)); + vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1)); + _mm_storeu_ps(&coherence_state->sd[i], vec_sd); + _mm_storeu_ps(&coherence_state->se[i], vec_se); + _mm_storeu_ps(&coherence_state->sx[i], vec_sx); + + { + const __m128 vec_3210 = _mm_loadu_ps(&coherence_state->sde[i][0]); + const __m128 vec_7654 = _mm_loadu_ps(&coherence_state->sde[i + 2][0]); + __m128 vec_a = + _mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 vec_b = + _mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0); + __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1); + vec_a = _mm_mul_ps(vec_a, vec_GCoh0); + vec_b = _mm_mul_ps(vec_b, vec_GCoh0); + vec_dfwefw0011 = + _mm_add_ps(vec_dfwefw0011, _mm_mul_ps(vec_dfw1, vec_efw1)); + vec_dfwefw0110 = + _mm_sub_ps(vec_dfwefw0110, _mm_mul_ps(vec_dfw1, vec_efw0)); + vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1)); + vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1)); + _mm_storeu_ps(&coherence_state->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b)); + _mm_storeu_ps(&coherence_state->sde[i + 2][0], + _mm_unpackhi_ps(vec_a, vec_b)); + } + + { + const __m128 vec_3210 = _mm_loadu_ps(&coherence_state->sxd[i][0]); + const __m128 vec_7654 = _mm_loadu_ps(&coherence_state->sxd[i + 2][0]); + __m128 vec_a = + _mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 vec_b = + _mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0); + __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1); + vec_a = _mm_mul_ps(vec_a, vec_GCoh0); + vec_b = _mm_mul_ps(vec_b, vec_GCoh0); + vec_dfwxfw0011 = + _mm_add_ps(vec_dfwxfw0011, _mm_mul_ps(vec_dfw1, vec_xfw1)); + vec_dfwxfw0110 = + _mm_sub_ps(vec_dfwxfw0110, _mm_mul_ps(vec_dfw1, vec_xfw0)); + vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1)); + vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1)); + _mm_storeu_ps(&coherence_state->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b)); + _mm_storeu_ps(&coherence_state->sxd[i + 2][0], + _mm_unpackhi_ps(vec_a, vec_b)); + } + + vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd); + vec_seSum = _mm_add_ps(vec_seSum, vec_se); + } + + _mm_add_ps_4x1(vec_sdSum, &sdSum); + _mm_add_ps_4x1(vec_seSum, &seSum); + + for (; i < PART_LEN1; i++) { + coherence_state->sd[i] = + ptrGCoh[0] * coherence_state->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + coherence_state->se[i] = + ptrGCoh[0] * coherence_state->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + coherence_state->sx[i] = + ptrGCoh[0] * coherence_state->sx[i] + + ptrGCoh[1] * + WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + coherence_state->sde[i][0] = + ptrGCoh[0] * coherence_state->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + coherence_state->sde[i][1] = + ptrGCoh[0] * coherence_state->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + coherence_state->sxd[i][0] = + ptrGCoh[0] * coherence_state->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + coherence_state->sxd[i][1] = + ptrGCoh[0] * coherence_state->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += coherence_state->sd[i]; + seSum += coherence_state->se[i]; + } + + // Divergent filter safeguard update. + *filter_divergence_state = + (*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum; + + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); +} + +// Window time domain data to be used by the fft. +static void WindowDataSSE2(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); + const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]); + const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]); + // A B C D + __m128 vec_sqrtHanning_rev = + _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); + // D C B A + vec_sqrtHanning_rev = _mm_shuffle_ps( + vec_sqrtHanning_rev, vec_sqrtHanning_rev, _MM_SHUFFLE(0, 1, 2, 3)); + _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning)); + _mm_storeu_ps(&x_windowed[PART_LEN + i], + _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev)); + } +} + +// Puts fft output data into a complex valued array. +static void StoreAsComplexSSE2(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); + const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]); + const __m128 vec_a = + _mm_shuffle_ps(vec_fft0, vec_fft4, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_b = + _mm_shuffle_ps(vec_fft0, vec_fft4, _MM_SHUFFLE(3, 1, 3, 1)); + _mm_storeu_ps(&data_complex[0][i], vec_a); + _mm_storeu_ps(&data_complex[1][i], vec_b); + } + // fix beginning/end values + data_complex[1][0] = 0; + data_complex[1][PART_LEN] = 0; + data_complex[0][0] = data[0]; + data_complex[0][PART_LEN] = data[1]; +} + +static void ComputeCoherenceSSE2(const CoherenceState* coherence_state, + float* cohde, + float* cohxd) { + int i; + + { + const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f); + + // Subband coherence + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 vec_sd = _mm_loadu_ps(&coherence_state->sd[i]); + const __m128 vec_se = _mm_loadu_ps(&coherence_state->se[i]); + const __m128 vec_sx = _mm_loadu_ps(&coherence_state->sx[i]); + const __m128 vec_sdse = + _mm_add_ps(vec_1eminus10, _mm_mul_ps(vec_sd, vec_se)); + const __m128 vec_sdsx = + _mm_add_ps(vec_1eminus10, _mm_mul_ps(vec_sd, vec_sx)); + const __m128 vec_sde_3210 = _mm_loadu_ps(&coherence_state->sde[i][0]); + const __m128 vec_sde_7654 = _mm_loadu_ps(&coherence_state->sde[i + 2][0]); + const __m128 vec_sxd_3210 = _mm_loadu_ps(&coherence_state->sxd[i][0]); + const __m128 vec_sxd_7654 = _mm_loadu_ps(&coherence_state->sxd[i + 2][0]); + const __m128 vec_sde_0 = + _mm_shuffle_ps(vec_sde_3210, vec_sde_7654, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_sde_1 = + _mm_shuffle_ps(vec_sde_3210, vec_sde_7654, _MM_SHUFFLE(3, 1, 3, 1)); + const __m128 vec_sxd_0 = + _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_sxd_1 = + _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654, _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0); + __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0); + vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1)); + vec_cohde = _mm_div_ps(vec_cohde, vec_sdse); + vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1)); + vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx); + _mm_storeu_ps(&cohde[i], vec_cohde); + _mm_storeu_ps(&cohxd[i], vec_cohxd); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] + + coherence_state->sde[i][1] * coherence_state->sde[i][1]) / + (coherence_state->sd[i] * coherence_state->se[i] + 1e-10f); + cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] + + coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) / + (coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f); + } + } +} + +void WebRtcAec_InitAec_SSE2(void) { + WebRtcAec_FilterFar = FilterFarSSE2; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; + WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; + WebRtcAec_Overdrive = OverdriveSSE2; + WebRtcAec_Suppress = SuppressSSE2; + WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2; + WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2; + WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; + WebRtcAec_PartitionDelay = PartitionDelaySSE2; + WebRtcAec_WindowData = WindowDataSSE2; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_resampler.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_resampler.cc new file mode 100644 index 0000000000..2851c0b6dd --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_resampler.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for + * clock skew by resampling the farend signal. + */ + +#include "modules/audio_processing/aec/aec_resampler.h" + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "modules/audio_processing/aec/aec_core.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +enum { kEstimateLengthFrames = 400 }; + +typedef struct { + float buffer[kResamplerBufferSize]; + float position; + + int deviceSampleRateHz; + int skewData[kEstimateLengthFrames]; + int skewDataIndex; + float skewEstimate; +} AecResampler; + +static int EstimateSkew(const int* rawSkew, + int size, + int deviceSampleRateHz, + float* skewEst); + +void* WebRtcAec_CreateResampler() { + return malloc(sizeof(AecResampler)); +} + +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) { + AecResampler* obj = static_cast<AecResampler*>(resampInst); + memset(obj->buffer, 0, sizeof(obj->buffer)); + obj->position = 0.0; + + obj->deviceSampleRateHz = deviceSampleRateHz; + memset(obj->skewData, 0, sizeof(obj->skewData)); + obj->skewDataIndex = 0; + obj->skewEstimate = 0.0; + + return 0; +} + +void WebRtcAec_FreeResampler(void* resampInst) { + AecResampler* obj = static_cast<AecResampler*>(resampInst); + free(obj); +} + +void WebRtcAec_ResampleLinear(void* resampInst, + const float* inspeech, + size_t size, + float skew, + float* outspeech, + size_t* size_out) { + AecResampler* obj = static_cast<AecResampler*>(resampInst); + + float* y; + float be, tnew; + size_t tn, mm; + + RTC_DCHECK_LE(size, 2 * FRAME_LEN); + RTC_DCHECK(resampInst); + RTC_DCHECK(inspeech); + RTC_DCHECK(outspeech); + RTC_DCHECK(size_out); + + // Add new frame data in lookahead + memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay], inspeech, + size * sizeof(inspeech[0])); + + // Sample rate ratio + be = 1 + skew; + + // Loop over input frame + mm = 0; + y = &obj->buffer[FRAME_LEN]; // Point at current frame + + tnew = be * mm + obj->position; + tn = (size_t)tnew; + + while (tn < size) { + // Interpolation + outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]); + mm++; + + tnew = be * mm + obj->position; + tn = static_cast<int>(tnew); + } + + *size_out = mm; + obj->position += (*size_out) * be - size; + + // Shift buffer + memmove(obj->buffer, &obj->buffer[size], + (kResamplerBufferSize - size) * sizeof(obj->buffer[0])); +} + +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) { + AecResampler* obj = static_cast<AecResampler*>(resampInst); + int err = 0; + + if (obj->skewDataIndex < kEstimateLengthFrames) { + obj->skewData[obj->skewDataIndex] = rawSkew; + obj->skewDataIndex++; + } else if (obj->skewDataIndex == kEstimateLengthFrames) { + err = EstimateSkew(obj->skewData, kEstimateLengthFrames, + obj->deviceSampleRateHz, skewEst); + obj->skewEstimate = *skewEst; + obj->skewDataIndex++; + } else { + *skewEst = obj->skewEstimate; + } + + return err; +} + +int EstimateSkew(const int* rawSkew, + int size, + int deviceSampleRateHz, + float* skewEst) { + const int absLimitOuter = static_cast<int>(0.04f * deviceSampleRateHz); + const int absLimitInner = static_cast<int>(0.0025f * deviceSampleRateHz); + int i = 0; + int n = 0; + float rawAvg = 0; + float err = 0; + float rawAbsDev = 0; + int upperLimit = 0; + int lowerLimit = 0; + float cumSum = 0; + float x = 0; + float x2 = 0; + float y = 0; + float xy = 0; + float xAvg = 0; + float denom = 0; + float skew = 0; + + *skewEst = 0; // Set in case of error below. + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + n++; + rawAvg += rawSkew[i]; + } + } + + if (n == 0) { + return -1; + } + RTC_DCHECK_GT(n, 0); + rawAvg /= n; + + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + err = rawSkew[i] - rawAvg; + rawAbsDev += err >= 0 ? err : -err; + } + } + RTC_DCHECK_GT(n, 0); + rawAbsDev /= n; + upperLimit = static_cast<int>(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling. + lowerLimit = static_cast<int>(rawAvg - 5 * rawAbsDev - 1); // -1 for floor. + + n = 0; + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) || + (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) { + n++; + cumSum += rawSkew[i]; + x += n; + x2 += n * n; + y += cumSum; + xy += n * cumSum; + } + } + + if (n == 0) { + return -1; + } + RTC_DCHECK_GT(n, 0); + xAvg = x / n; + denom = x2 - xAvg * x; + + if (denom != 0) { + skew = (xy - xAvg * y) / denom; + } + + *skewEst = skew; + return 0; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_resampler.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_resampler.h new file mode 100644 index 0000000000..130f7ec7c7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_resampler.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ +#define MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ + +#include "modules/audio_processing/aec/aec_core.h" + +namespace webrtc { + +enum { kResamplingDelay = 1 }; +enum { kResamplerBufferSize = FRAME_LEN * 4 }; + +// Unless otherwise specified, functions return 0 on success and -1 on error. +void* WebRtcAec_CreateResampler(); // Returns NULL on error. +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz); +void WebRtcAec_FreeResampler(void* resampInst); + +// Estimates skew from raw measurement. +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst); + +// Resamples input using linear interpolation. +void WebRtcAec_ResampleLinear(void* resampInst, + const float* inspeech, + size_t size, + float skew, + float* outspeech, + size_t* size_out); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation.cc new file mode 100644 index 0000000000..eeaa909a17 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation.cc @@ -0,0 +1,868 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Contains the API functions for the AEC. + */ +#include "modules/audio_processing/aec/echo_cancellation.h" + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aec/aec_core.h" +#include "modules/audio_processing/aec/aec_resampler.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +Aec::Aec() = default; +Aec::~Aec() = default; + +// Measured delays [ms] +// Device Chrome GTP +// MacBook Air 10 +// MacBook Retina 10 100 +// MacPro 30? +// +// Win7 Desktop 70 80? +// Win7 T430s 110 +// Win8 T420s 70 +// +// Daisy 50 +// Pixel (w/ preproc?) 240 +// Pixel (w/o preproc?) 110 110 + +// The extended filter mode gives us the flexibility to ignore the system's +// reported delays. We do this for platforms which we believe provide results +// which are incompatible with the AEC's expectations. Based on measurements +// (some provided above) we set a conservative (i.e. lower than measured) +// fixed delay. +// +// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode| +// is enabled. See the note along with |DelayCorrection| in +// echo_cancellation_impl.h for more details on the mode. +// +// Justification: +// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays +// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms +// and then compensate by rewinding by 10 ms (in wideband) through +// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind +// values, but fortunately this is sufficient. +// +// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond +// well to reality. The variance doesn't match the AEC's buffer changes, and the +// bulk values tend to be too low. However, the range across different hardware +// appears to be too large to choose a single value. +// +// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values. +#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC) +#define WEBRTC_UNTRUSTED_DELAY +#endif + +#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC) +static const int kDelayDiffOffsetSamples = -160; +#else +// Not enabled for now. +static const int kDelayDiffOffsetSamples = 0; +#endif + +#if defined(WEBRTC_MAC) +static const int kFixedDelayMs = 20; +#else +static const int kFixedDelayMs = 50; +#endif +#if !defined(WEBRTC_UNTRUSTED_DELAY) +static const int kMinTrustedDelayMs = 20; +#endif +static const int kMaxTrustedDelayMs = 500; + +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +// TODO(bjornv): Replace with kResamplerBufferSize +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const int kMaxBufSizeStart = 62; // In partitions +static const int sampMsNb = 8; // samples per ms in nb +static const int initCheck = 42; + +std::atomic<int> Aec::instance_count = 0; + +// Estimates delay to set the position of the far-end buffer read pointer +// (controlled by knownDelay) +static void EstBufDelayNormal(Aec* aecInst); +static void EstBufDelayExtended(Aec* aecInst); +static int ProcessNormal(Aec* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew); +static void ProcessExtended(Aec* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew); + +void* WebRtcAec_Create() { + Aec* aecpc = new Aec(); + + if (!aecpc) { + return NULL; + } + aecpc->data_dumper.reset(new ApmDataDumper(aecpc->instance_count)); + + aecpc->aec = WebRtcAec_CreateAec(aecpc->instance_count); + if (!aecpc->aec) { + WebRtcAec_Free(aecpc); + return NULL; + } + aecpc->resampler = WebRtcAec_CreateResampler(); + if (!aecpc->resampler) { + WebRtcAec_Free(aecpc); + return NULL; + } + // Create far-end pre-buffer. The buffer size has to be large enough for + // largest possible drift compensation (kResamplerBufferSize) + "almost" an + // FFT buffer (PART_LEN2 - 1). + aecpc->far_pre_buf = + WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float)); + if (!aecpc->far_pre_buf) { + WebRtcAec_Free(aecpc); + return NULL; + } + + aecpc->initFlag = 0; + + aecpc->instance_count++; + return aecpc; +} + +void WebRtcAec_Free(void* aecInst) { + Aec* aecpc = reinterpret_cast<Aec*>(aecInst); + + if (aecpc == NULL) { + return; + } + + WebRtc_FreeBuffer(aecpc->far_pre_buf); + + WebRtcAec_FreeAec(aecpc->aec); + WebRtcAec_FreeResampler(aecpc->resampler); + delete aecpc; +} + +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { + Aec* aecpc = reinterpret_cast<Aec*>(aecInst); + aecpc->data_dumper->InitiateNewSetOfRecordings(); + AecConfig aecConfig; + + if (sampFreq != 8000 && sampFreq != 16000 && sampFreq != 32000 && + sampFreq != 48000) { + return AEC_BAD_PARAMETER_ERROR; + } + aecpc->sampFreq = sampFreq; + + if (scSampFreq < 1 || scSampFreq > 96000) { + return AEC_BAD_PARAMETER_ERROR; + } + aecpc->scSampFreq = scSampFreq; + + // Initialize echo canceller core + if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { + return AEC_UNSPECIFIED_ERROR; + } + + if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { + return AEC_UNSPECIFIED_ERROR; + } + + WebRtc_InitBuffer(aecpc->far_pre_buf); + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); // Start overlap. + + aecpc->initFlag = initCheck; // indicates that initialization has been done + + if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) { + aecpc->splitSampFreq = 16000; + } else { + aecpc->splitSampFreq = sampFreq; + } + + aecpc->delayCtr = 0; + aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq; + // Sampling frequency multiplier (SWB is processed as 160 frame size). + aecpc->rate_factor = aecpc->splitSampFreq / 8000; + + aecpc->sum = 0; + aecpc->counter = 0; + aecpc->checkBuffSize = 1; + aecpc->firstVal = 0; + + // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled, + // but not extended_filter mode. + aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) || + !WebRtcAec_delay_agnostic_enabled(aecpc->aec); + aecpc->bufSizeStart = 0; + aecpc->checkBufSizeCtr = 0; + aecpc->msInSndCardBuf = 0; + aecpc->filtDelay = -1; // -1 indicates an initialized state. + aecpc->timeForDelayChange = 0; + aecpc->knownDelay = 0; + aecpc->lastDelayDiff = 0; + + aecpc->skewFrCtr = 0; + aecpc->resample = kAecFalse; + aecpc->highSkewCtr = 0; + aecpc->skew = 0; + + aecpc->farend_started = 0; + + // Default settings. + aecConfig.nlpMode = kAecNlpModerate; + aecConfig.skewMode = kAecFalse; + aecConfig.metricsMode = kAecFalse; + aecConfig.delay_logging = kAecFalse; + + if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { + return AEC_UNSPECIFIED_ERROR; + } + + return 0; +} + +// Returns any error that is caused when buffering the +// far-end signal. +int32_t WebRtcAec_GetBufferFarendError(void* aecInst, + const float* farend, + size_t nrOfSamples) { + Aec* aecpc = reinterpret_cast<Aec*>(aecInst); + + if (!farend) + return AEC_NULL_POINTER_ERROR; + + if (aecpc->initFlag != initCheck) + return AEC_UNINITIALIZED_ERROR; + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) + return AEC_BAD_PARAMETER_ERROR; + + return 0; +} + +// only buffer L band for farend +int32_t WebRtcAec_BufferFarend(void* aecInst, + const float* farend, + size_t nrOfSamples) { + Aec* aecpc = reinterpret_cast<Aec*>(aecInst); + size_t newNrOfSamples = nrOfSamples; + float new_farend[MAX_RESAMP_LEN]; + const float* farend_ptr = farend; + + // Get any error caused by buffering the farend signal. + int32_t error_code = + WebRtcAec_GetBufferFarendError(aecInst, farend, nrOfSamples); + + if (error_code != 0) + return error_code; + + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + // Resample and get a new number of samples + WebRtcAec_ResampleLinear(aecpc->resampler, farend, nrOfSamples, aecpc->skew, + new_farend, &newNrOfSamples); + farend_ptr = new_farend; + } + + aecpc->farend_started = 1; + WebRtcAec_SetSystemDelay(aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + + static_cast<int>(newNrOfSamples)); + + // Write the time-domain data to |far_pre_buf|. + WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples); + + // TODO(minyue): reduce to |PART_LEN| samples for each buffering. + while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { + // We have enough data to pass to the FFT, hence read PART_LEN2 samples. + { + float* ptmp = NULL; + float tmp[PART_LEN2]; + WebRtc_ReadBuffer(aecpc->far_pre_buf, + reinterpret_cast<void**>(&ptmp), tmp, PART_LEN2); + WebRtcAec_BufferFarendBlock(aecpc->aec, &ptmp[PART_LEN]); + } + + // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); + } + + return 0; +} + +int32_t WebRtcAec_Process(void* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew) { + Aec* aecpc = reinterpret_cast<Aec*>(aecInst); + int32_t retVal = 0; + + if (out == NULL) { + return AEC_NULL_POINTER_ERROR; + } + + if (aecpc->initFlag != initCheck) { + return AEC_UNINITIALIZED_ERROR; + } + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + return AEC_BAD_PARAMETER_ERROR; + } + + if (msInSndCardBuf < 0) { + msInSndCardBuf = 0; + retVal = AEC_BAD_PARAMETER_WARNING; + } else if (msInSndCardBuf > kMaxTrustedDelayMs) { + // The clamping is now done in ProcessExtended/Normal(). + retVal = AEC_BAD_PARAMETER_WARNING; + } + + // This returns the value of aec->extended_filter_enabled. + if (WebRtcAec_extended_filter_enabled(aecpc->aec)) { + ProcessExtended(aecpc, nearend, num_bands, out, nrOfSamples, msInSndCardBuf, + skew); + } else { + retVal = ProcessNormal(aecpc, nearend, num_bands, out, nrOfSamples, + msInSndCardBuf, skew); + } + + int far_buf_size_samples = WebRtcAec_system_delay(aecpc->aec); + aecpc->data_dumper->DumpRaw("aec_system_delay", 1, &far_buf_size_samples); + aecpc->data_dumper->DumpRaw("aec_known_delay", 1, &aecpc->knownDelay); + + return retVal; +} + +int WebRtcAec_set_config(void* handle, AecConfig config) { + Aec* self = reinterpret_cast<Aec*>(handle); + if (self->initFlag != initCheck) { + return AEC_UNINITIALIZED_ERROR; + } + + if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) { + return AEC_BAD_PARAMETER_ERROR; + } + self->skewMode = config.skewMode; + + if (config.nlpMode != kAecNlpConservative && + config.nlpMode != kAecNlpModerate && + config.nlpMode != kAecNlpAggressive) { + return AEC_BAD_PARAMETER_ERROR; + } + + if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) { + return AEC_BAD_PARAMETER_ERROR; + } + + if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) { + return AEC_BAD_PARAMETER_ERROR; + } + + WebRtcAec_SetConfigCore(self->aec, config.nlpMode, config.metricsMode, + config.delay_logging); + return 0; +} + +int WebRtcAec_get_echo_status(void* handle, int* status) { + Aec* self = reinterpret_cast<Aec*>(handle); + if (status == NULL) { + return AEC_NULL_POINTER_ERROR; + } + if (self->initFlag != initCheck) { + return AEC_UNINITIALIZED_ERROR; + } + + *status = WebRtcAec_echo_state(self->aec); + + return 0; +} + +int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { + const float kUpWeight = 0.7f; + float dtmp; + int stmp; + Aec* self = reinterpret_cast<Aec*>(handle); + Stats erl; + Stats erle; + Stats a_nlp; + + if (handle == NULL) { + return -1; + } + if (metrics == NULL) { + return AEC_NULL_POINTER_ERROR; + } + if (self->initFlag != initCheck) { + return AEC_UNINITIALIZED_ERROR; + } + + WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp, + &metrics->divergent_filter_fraction); + + // ERL + metrics->erl.instant = static_cast<int>(erl.instant); + + if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average; + metrics->erl.average = static_cast<int>(dtmp); + } else { + metrics->erl.average = kOffsetLevel; + } + + metrics->erl.max = static_cast<int>(erl.max); + + if (erl.min < (kOffsetLevel * (-1))) { + metrics->erl.min = static_cast<int>(erl.min); + } else { + metrics->erl.min = kOffsetLevel; + } + + // ERLE + metrics->erle.instant = static_cast<int>(erle.instant); + + if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average; + metrics->erle.average = static_cast<int>(dtmp); + } else { + metrics->erle.average = kOffsetLevel; + } + + metrics->erle.max = static_cast<int>(erle.max); + + if (erle.min < (kOffsetLevel * (-1))) { + metrics->erle.min = static_cast<int>(erle.min); + } else { + metrics->erle.min = kOffsetLevel; + } + + // RERL + if ((metrics->erl.average > kOffsetLevel) && + (metrics->erle.average > kOffsetLevel)) { + stmp = metrics->erl.average + metrics->erle.average; + } else { + stmp = kOffsetLevel; + } + metrics->rerl.average = stmp; + + // No other statistics needed, but returned for completeness. + metrics->rerl.instant = stmp; + metrics->rerl.max = stmp; + metrics->rerl.min = stmp; + + // A_NLP + metrics->aNlp.instant = static_cast<int>(a_nlp.instant); + + if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average; + metrics->aNlp.average = static_cast<int>(dtmp); + } else { + metrics->aNlp.average = kOffsetLevel; + } + + metrics->aNlp.max = static_cast<int>(a_nlp.max); + + if (a_nlp.min < (kOffsetLevel * (-1))) { + metrics->aNlp.min = static_cast<int>(a_nlp.min); + } else { + metrics->aNlp.min = kOffsetLevel; + } + + return 0; +} + +int WebRtcAec_GetDelayMetrics(void* handle, + int* median, + int* std, + float* fraction_poor_delays) { + Aec* self = reinterpret_cast<Aec*>(handle); + if (median == NULL) { + return AEC_NULL_POINTER_ERROR; + } + if (std == NULL) { + return AEC_NULL_POINTER_ERROR; + } + if (self->initFlag != initCheck) { + return AEC_UNINITIALIZED_ERROR; + } + if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std, + fraction_poor_delays) == -1) { + // Logging disabled. + return AEC_UNSUPPORTED_FUNCTION_ERROR; + } + + return 0; +} + +AecCore* WebRtcAec_aec_core(void* handle) { + if (!handle) { + return NULL; + } + return reinterpret_cast<Aec*>(handle)->aec; +} + +static int ProcessNormal(Aec* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew) { + int retVal = 0; + size_t i; + size_t nBlocks10ms; + // Limit resampling to doubling/halving of signal + const float minSkewEst = -0.5f; + const float maxSkewEst = 1.0f; + + reported_delay_ms = + reported_delay_ms > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : + reported_delay_ms; + // TODO(andrew): we need to investigate if this +10 is really wanted. + reported_delay_ms += 10; + aecInst->msInSndCardBuf = reported_delay_ms; + + if (aecInst->skewMode == kAecTrue) { + if (aecInst->skewFrCtr < 25) { + aecInst->skewFrCtr++; + } else { + retVal = WebRtcAec_GetSkew(aecInst->resampler, skew, &aecInst->skew); + if (retVal == -1) { + aecInst->skew = 0; + retVal = AEC_BAD_PARAMETER_WARNING; + } + + aecInst->skew /= aecInst->sampFactor * num_samples; + + if (aecInst->skew < 1.0e-3 && aecInst->skew > -1.0e-3) { + aecInst->resample = kAecFalse; + } else { + aecInst->resample = kAecTrue; + } + + if (aecInst->skew < minSkewEst) { + aecInst->skew = minSkewEst; + } else if (aecInst->skew > maxSkewEst) { + aecInst->skew = maxSkewEst; + } + + aecInst->data_dumper->DumpRaw("aec_skew", 1, &aecInst->skew); + } + } + + nBlocks10ms = num_samples / (FRAME_LEN * aecInst->rate_factor); + + if (aecInst->startup_phase) { + for (i = 0; i < num_bands; ++i) { + // Only needed if they don't already point to the same place. + if (nearend[i] != out[i]) { + memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * num_samples); + } + } + + // The AEC is in the start up mode + // AEC is disabled until the system delay is OK + + // Mechanism to ensure that the system delay is reasonably stable. + if (aecInst->checkBuffSize) { + aecInst->checkBufSizeCtr++; + // Before we fill up the far-end buffer we require the system delay + // to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 6 consecutive 10 ms + // blocks. If it seems to be stable then we start to fill up the + // far-end buffer. + if (aecInst->counter == 0) { + aecInst->firstVal = aecInst->msInSndCardBuf; + aecInst->sum = 0; + } + + if (abs(aecInst->firstVal - aecInst->msInSndCardBuf) < + WEBRTC_SPL_MAX(0.2 * aecInst->msInSndCardBuf, sampMsNb)) { + aecInst->sum += aecInst->msInSndCardBuf; + aecInst->counter++; + } else { + aecInst->counter = 0; + } + + if (aecInst->counter * nBlocks10ms >= 6) { + // The far-end buffer size is determined in partitions of + // PART_LEN samples. Use 75% of the average value of the system + // delay as buffer size to start with. + aecInst->bufSizeStart = + WEBRTC_SPL_MIN((3 * aecInst->sum * aecInst->rate_factor * 8) / + (4 * aecInst->counter * PART_LEN), + kMaxBufSizeStart); + // Buffer size has now been determined. + aecInst->checkBuffSize = 0; + } + + if (aecInst->checkBufSizeCtr * nBlocks10ms > 50) { + // For really bad systems, don't disable the echo canceller for + // more than 0.5 sec. + aecInst->bufSizeStart = WEBRTC_SPL_MIN( + (aecInst->msInSndCardBuf * aecInst->rate_factor * 3) / 40, + kMaxBufSizeStart); + aecInst->checkBuffSize = 0; + } + } + + // If |checkBuffSize| changed in the if-statement above. + if (!aecInst->checkBuffSize) { + // The system delay is now reasonably stable (or has been unstable + // for too long). When the far-end buffer is filled with + // approximately the same amount of data as reported by the system + // we end the startup phase. + int overhead_elements = + WebRtcAec_system_delay(aecInst->aec) / PART_LEN - + aecInst->bufSizeStart; + if (overhead_elements == 0) { + // Enable the AEC + aecInst->startup_phase = 0; + } else if (overhead_elements > 0) { + // TODO(bjornv): Do we need a check on how much we actually + // moved the read pointer? It should always be possible to move + // the pointer |overhead_elements| since we have only added data + // to the buffer and no delay compensation nor AEC processing + // has been done. + WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(aecInst->aec, + overhead_elements); + + // Enable the AEC + aecInst->startup_phase = 0; + } + } + } else { + // AEC is enabled. + EstBufDelayNormal(aecInst); + + // Call the AEC. + // TODO(bjornv): Re-structure such that we don't have to pass + // |aecInst->knownDelay| as input. Change name to something like + // |system_buffer_diff|. + WebRtcAec_ProcessFrames(aecInst->aec, nearend, num_bands, num_samples, + aecInst->knownDelay, out); + } + + return retVal; +} + +static void ProcessExtended(Aec* self, + const float* const* near, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew) { + size_t i; + const int delay_diff_offset = kDelayDiffOffsetSamples; + RTC_DCHECK(num_samples == 80 || num_samples == 160); +#if defined(WEBRTC_UNTRUSTED_DELAY) + reported_delay_ms = kFixedDelayMs; +#else + // This is the usual mode where we trust the reported system delay values. + // Due to the longer filter, we no longer add 10 ms to the reported delay + // to reduce chance of non-causality. Instead we apply a minimum here to avoid + // issues with the read pointer jumping around needlessly. + reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs + ? kMinTrustedDelayMs + : reported_delay_ms; + // If the reported delay appears to be bogus, we attempt to recover by using + // the measured fixed delay values. We use >= here because higher layers + // may already clamp to this maximum value, and we would otherwise not + // detect it here. + reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs + ? kFixedDelayMs + : reported_delay_ms; +#endif + self->msInSndCardBuf = reported_delay_ms; + + if (!self->farend_started) { + for (i = 0; i < num_bands; ++i) { + // Only needed if they don't already point to the same place. + if (near[i] != out[i]) { + memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples); + } + } + return; + } + if (self->startup_phase) { + // In the extended mode, there isn't a startup "phase", just a special + // action on the first frame. In the trusted delay case, we'll take the + // current reported delay, unless it's less then our conservative + // measurement. + int startup_size_ms = + reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms; +#if defined(WEBRTC_ANDROID) + int target_delay = startup_size_ms * self->rate_factor * 8; +#else + // To avoid putting the AEC in a non-causal state we're being slightly + // conservative and scale by 2. On Android we use a fixed delay and + // therefore there is no need to scale the target_delay. + int target_delay = startup_size_ms * self->rate_factor * 8 / 2; +#endif + int overhead_elements = + (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN; + WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(self->aec, + overhead_elements); + self->startup_phase = 0; + } + + EstBufDelayExtended(self); + + { + // |delay_diff_offset| gives us the option to manually rewind the delay on + // very low delay platforms which can't be expressed purely through + // |reported_delay_ms|. + const int adjusted_known_delay = + WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset); + + WebRtcAec_ProcessFrames(self->aec, near, num_bands, num_samples, + adjusted_known_delay, out); + } +} + +static void EstBufDelayNormal(Aec* aecInst) { + int nSampSndCard = aecInst->msInSndCardBuf * sampMsNb * aecInst->rate_factor; + int current_delay = nSampSndCard - WebRtcAec_system_delay(aecInst->aec); + int delay_difference = 0; + + // Before we proceed with the delay estimate filtering we: + // 1) Compensate for the frame that will be read. + // 2) Compensate for drift resampling. + // 3) Compensate for non-causality if needed, since the estimated delay can't + // be negative. + + // 1) Compensating for the frame(s) that will be read/processed. + current_delay += FRAME_LEN * aecInst->rate_factor; + + // 2) Account for resampling frame delay. + if (aecInst->skewMode == kAecTrue && aecInst->resample == kAecTrue) { + current_delay -= kResamplingDelay; + } + + // 3) Compensate for non-causality, if needed, by flushing one block. + if (current_delay < PART_LEN) { + current_delay += + WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(aecInst->aec, 1) * + PART_LEN; + } + + // We use -1 to signal an initialized state in the "extended" implementation; + // compensate for that. + aecInst->filtDelay = aecInst->filtDelay < 0 ? 0 : aecInst->filtDelay; + aecInst->filtDelay = + WEBRTC_SPL_MAX(0, static_cast<int16_t>(0.8 * + aecInst->filtDelay + + 0.2 * current_delay)); + + delay_difference = aecInst->filtDelay - aecInst->knownDelay; + if (delay_difference > 224) { + if (aecInst->lastDelayDiff < 96) { + aecInst->timeForDelayChange = 0; + } else { + aecInst->timeForDelayChange++; + } + } else if (delay_difference < 96 && aecInst->knownDelay > 0) { + if (aecInst->lastDelayDiff > 224) { + aecInst->timeForDelayChange = 0; + } else { + aecInst->timeForDelayChange++; + } + } else { + aecInst->timeForDelayChange = 0; + } + aecInst->lastDelayDiff = delay_difference; + + if (aecInst->timeForDelayChange > 25) { + aecInst->knownDelay = WEBRTC_SPL_MAX((int)aecInst->filtDelay - 160, 0); + } +} + +static void EstBufDelayExtended(Aec* aecInst) { + int reported_delay = aecInst->msInSndCardBuf * sampMsNb * + aecInst->rate_factor; + int current_delay = reported_delay - WebRtcAec_system_delay(aecInst->aec); + int delay_difference = 0; + + // Before we proceed with the delay estimate filtering we: + // 1) Compensate for the frame that will be read. + // 2) Compensate for drift resampling. + // 3) Compensate for non-causality if needed, since the estimated delay can't + // be negative. + + // 1) Compensating for the frame(s) that will be read/processed. + current_delay += FRAME_LEN * aecInst->rate_factor; + + // 2) Account for resampling frame delay. + if (aecInst->skewMode == kAecTrue && aecInst->resample == kAecTrue) { + current_delay -= kResamplingDelay; + } + + // 3) Compensate for non-causality, if needed, by flushing two blocks. + if (current_delay < PART_LEN) { + current_delay += + WebRtcAec_AdjustFarendBufferSizeAndSystemDelay(aecInst->aec, 2) * + PART_LEN; + } + + if (aecInst->filtDelay == -1) { + aecInst->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay); + } else { + aecInst->filtDelay = WEBRTC_SPL_MAX( + 0, static_cast<int16_t>(0.95 * aecInst->filtDelay + 0.05 * + current_delay)); + } + + delay_difference = aecInst->filtDelay - aecInst->knownDelay; + if (delay_difference > 384) { + if (aecInst->lastDelayDiff < 128) { + aecInst->timeForDelayChange = 0; + } else { + aecInst->timeForDelayChange++; + } + } else if (delay_difference < 128 && aecInst->knownDelay > 0) { + if (aecInst->lastDelayDiff > 384) { + aecInst->timeForDelayChange = 0; + } else { + aecInst->timeForDelayChange++; + } + } else { + aecInst->timeForDelayChange = 0; + } + aecInst->lastDelayDiff = delay_difference; + + if (aecInst->timeForDelayChange > 25) { + aecInst->knownDelay = WEBRTC_SPL_MAX((int)aecInst->filtDelay - 256, 0); + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation.h new file mode 100644 index 0000000000..96132ee721 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation.h @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ +#define MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ + +#include <memory> +#include <atomic> + +#include <stddef.h> + +extern "C" { +#include "common_audio/ring_buffer.h" +} +#include "modules/audio_processing/aec/aec_core.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +// Errors +#define AEC_UNSPECIFIED_ERROR 12000 +#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AEC_UNINITIALIZED_ERROR 12002 +#define AEC_NULL_POINTER_ERROR 12003 +#define AEC_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AEC_BAD_PARAMETER_WARNING 12050 + +enum { kAecNlpConservative = 0, kAecNlpModerate, kAecNlpAggressive }; + +enum { kAecFalse = 0, kAecTrue }; + +typedef struct { + int16_t nlpMode; // default kAecNlpModerate + int16_t skewMode; // default kAecFalse + int16_t metricsMode; // default kAecFalse + int delay_logging; // default kAecFalse + // float realSkew; +} AecConfig; + +typedef struct { + int instant; + int average; + int max; + int min; +} AecLevel; + +typedef struct { + AecLevel rerl; + AecLevel erl; + AecLevel erle; + AecLevel aNlp; + float divergent_filter_fraction; +} AecMetrics; + +struct AecCore; + +class ApmDataDumper; + +typedef struct Aec { + Aec(); + ~Aec(); + + std::unique_ptr<ApmDataDumper> data_dumper; + + int delayCtr; + int sampFreq; + int splitSampFreq; + int scSampFreq; + float sampFactor; // scSampRate / sampFreq + short skewMode; + int bufSizeStart; + int knownDelay; + int rate_factor; + + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + int sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; // Filtered delay estimate. + int timeForDelayChange; + int startup_phase; + int checkBuffSize; + short lastDelayDiff; + + // Structures + void* resampler; + + int skewFrCtr; + int resample; // if the skew is small enough we don't resample + int highSkewCtr; + float skew; + + RingBuffer* far_pre_buf; // Time domain far-end pre-buffer. + + int farend_started; + + // Aec instance counter. + static std::atomic<int> instance_count; + AecCore* aec; +} Aec; + +/* + * Allocates the memory needed by the AEC. The memory needs to be initialized + * separately using the WebRtcAec_Init() function. Returns a pointer to the + * object or NULL on error. + */ +void* WebRtcAec_Create(); + +/* + * This function releases the memory allocated by WebRtcAec_Create(). + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + */ +void WebRtcAec_Free(void* aecInst); + +/* + * Initializes an AEC instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * int32_t sampFreq Sampling frequency of data + * int32_t scSampFreq Soundcard sampling frequency + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * const float* farend In buffer containing one frame of + * farend signal for L band + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 12000-12050: error code + */ +int32_t WebRtcAec_BufferFarend(void* aecInst, + const float* farend, + size_t nrOfSamples); + +/* + * Reports any errors that would arise if buffering a farend buffer + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * const float* farend In buffer containing one frame of + * farend signal for L band + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 12000-12050: error code + */ +int32_t WebRtcAec_GetBufferFarendError(void* aecInst, + const float* farend, + size_t nrOfSamples); + +/* + * Runs the echo canceller on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * float* const* nearend In buffer containing one frame of + * nearend+echo signal for each band + * int num_bands Number of bands in nearend buffer + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and + * system buffers + * int16_t skew Difference between number of samples played + * and recorded at the soundcard (for clock skew + * compensation) + * + * Outputs Description + * ------------------------------------------------------------------- + * float* const* out Out buffer, one frame of processed nearend + * for each band + * int32_t return 0: OK + * 12000-12050: error code + */ +int32_t WebRtcAec_Process(void* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew); + +/* + * This function enables the user to set certain parameters on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * AecConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * int return 0: OK + * 12000-12050: error code + */ +int WebRtcAec_set_config(void* handle, AecConfig config); + +/* + * Gets the current echo status of the nearend signal. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int* status 0: Almost certainly nearend single-talk + * 1: Might not be neared single-talk + * int return 0: OK + * 12000-12050: error code + */ +int WebRtcAec_get_echo_status(void* handle, int* status); + +/* + * Gets the current echo metrics for the session. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * AecMetrics* metrics Struct which will be filled out with the + * current echo metrics. + * int return 0: OK + * 12000-12050: error code + */ +int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics); + +/* + * Gets the current delay metrics for the session. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int* median Delay median value. + * int* std Delay standard deviation. + * float* fraction_poor_delays Fraction of the delay estimates that may + * cause the AEC to perform poorly. + * + * int return 0: OK + * 12000-12050: error code + */ +int WebRtcAec_GetDelayMetrics(void* handle, + int* median, + int* std, + float* fraction_poor_delays); + +// Returns a pointer to the low level AEC handle. +// +// Input: +// - handle : Pointer to the AEC instance. +// +// Return value: +// - AecCore pointer : NULL for error. +// +struct AecCore* WebRtcAec_aec_core(void* handle); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc new file mode 100644 index 0000000000..b9c89fd9e9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// TODO(bjornv): Make this a comprehensive test. + +#include "modules/audio_processing/aec/echo_cancellation.h" + +#include <stdlib.h> +#include <time.h> + +#include "modules/audio_processing/aec/aec_core.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoCancellationTest, CreateAndFreeHasExpectedBehavior) { + void* handle = WebRtcAec_Create(); + ASSERT_TRUE(handle); + WebRtcAec_Free(nullptr); + WebRtcAec_Free(handle); +} + +TEST(EchoCancellationTest, ApplyAecCoreHandle) { + void* handle = WebRtcAec_Create(); + ASSERT_TRUE(handle); + EXPECT_TRUE(WebRtcAec_aec_core(NULL) == NULL); + AecCore* aec_core = WebRtcAec_aec_core(handle); + EXPECT_TRUE(aec_core != NULL); + // A simple test to verify that we can set and get a value from the lower + // level |aec_core| handle. + int delay = 111; + WebRtcAec_SetSystemDelay(aec_core, delay); + EXPECT_EQ(delay, WebRtcAec_system_delay(aec_core)); + WebRtcAec_Free(handle); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/system_delay_unittest.cc new file mode 100644 index 0000000000..fc57af8b87 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec/system_delay_unittest.cc @@ -0,0 +1,601 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec/aec_core.h" +#include "modules/audio_processing/aec/echo_cancellation.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "typedefs.h" // NOLINT(build/include) +namespace webrtc { +namespace { + +class SystemDelayTest : public ::testing::Test { + protected: + SystemDelayTest(); + virtual void SetUp(); + virtual void TearDown(); + + // Initialization of AEC handle with respect to |sample_rate_hz|. Since the + // device sample rate is unimportant we set that value to 48000 Hz. + void Init(int sample_rate_hz); + + // Makes one render call and one capture call in that specific order. + void RenderAndCapture(int device_buffer_ms); + + // Fills up the far-end buffer with respect to the default device buffer size. + size_t BufferFillUp(); + + // Runs and verifies the behavior in a stable startup procedure. + void RunStableStartup(); + + // Maps buffer size in ms into samples, taking the unprocessed frame into + // account. + int MapBufferSizeToSamples(int size_in_ms, bool extended_filter); + + void* handle_; + Aec* self_; + size_t samples_per_frame_; + // Dummy input/output speech data. + static const int kSamplesPerChunk = 160; + float far_[kSamplesPerChunk]; + float near_[kSamplesPerChunk]; + float out_[kSamplesPerChunk]; + const float* near_ptr_; + float* out_ptr_; +}; + +SystemDelayTest::SystemDelayTest() + : handle_(NULL), self_(NULL), samples_per_frame_(0) { + // Dummy input data are set with more or less arbitrary non-zero values. + for (int i = 0; i < kSamplesPerChunk; i++) { + far_[i] = 257.0; + near_[i] = 514.0; + } + memset(out_, 0, sizeof(out_)); + near_ptr_ = near_; + out_ptr_ = out_; +} + +void SystemDelayTest::SetUp() { + handle_ = WebRtcAec_Create(); + ASSERT_TRUE(handle_); + self_ = reinterpret_cast<Aec*>(handle_); +} + +void SystemDelayTest::TearDown() { + // Free AEC + WebRtcAec_Free(handle_); + handle_ = NULL; +} + +// In SWB mode nothing is added to the buffer handling with respect to +// functionality compared to WB. We therefore only verify behavior in NB and WB. +static const int kSampleRateHz[] = {8000, 16000}; +static const size_t kNumSampleRates = + sizeof(kSampleRateHz) / sizeof(*kSampleRateHz); + +// Default audio device buffer size used. +static const int kDeviceBufMs = 100; + +// Requirement for a stable device convergence time in ms. Should converge in +// less than |kStableConvergenceMs|. +static const int kStableConvergenceMs = 100; + +// Maximum convergence time in ms. This means that we should leave the startup +// phase after |kMaxConvergenceMs| independent of device buffer stability +// conditions. +static const int kMaxConvergenceMs = 500; + +void SystemDelayTest::Init(int sample_rate_hz) { + // Initialize AEC + EXPECT_EQ(0, WebRtcAec_Init(handle_, sample_rate_hz, 48000)); + EXPECT_EQ(0, WebRtcAec_system_delay(self_->aec)); + + // One frame equals 10 ms of data. + samples_per_frame_ = static_cast<size_t>(sample_rate_hz / 100); +} + +void SystemDelayTest::RenderAndCapture(int device_buffer_ms) { + EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + EXPECT_EQ(0, + WebRtcAec_Process(handle_, + &near_ptr_, + 1, + &out_ptr_, + samples_per_frame_, + device_buffer_ms, + 0)); +} + +size_t SystemDelayTest::BufferFillUp() { + // To make sure we have a full buffer when we verify stability we first fill + // up the far-end buffer with the same amount as we will report in through + // Process(). + size_t buffer_size = 0; + for (int i = 0; i < kDeviceBufMs / 10; i++) { + EXPECT_EQ(0, WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + buffer_size += samples_per_frame_; + EXPECT_EQ(static_cast<int>(buffer_size), + WebRtcAec_system_delay(self_->aec)); + } + return buffer_size; +} + +void SystemDelayTest::RunStableStartup() { + // To make sure we have a full buffer when we verify stability we first fill + // up the far-end buffer with the same amount as we will report in through + // Process(). + size_t buffer_size = BufferFillUp(); + + if (WebRtcAec_delay_agnostic_enabled(self_->aec) == 1) { + // In extended_filter mode we set the buffer size after the first processed + // 10 ms chunk. Hence, we don't need to wait for the reported system delay + // values to become stable. + RenderAndCapture(kDeviceBufMs); + buffer_size += samples_per_frame_; + EXPECT_EQ(0, self_->startup_phase); + } else { + // A stable device should be accepted and put in a regular process mode + // within |kStableConvergenceMs|. + int process_time_ms = 0; + for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) { + RenderAndCapture(kDeviceBufMs); + buffer_size += samples_per_frame_; + if (self_->startup_phase == 0) { + // We have left the startup phase. + break; + } + } + // Verify convergence time. + EXPECT_GT(kStableConvergenceMs, process_time_ms); + } + // Verify that the buffer has been flushed. + EXPECT_GE(static_cast<int>(buffer_size), + WebRtcAec_system_delay(self_->aec)); +} + + int SystemDelayTest::MapBufferSizeToSamples(int size_in_ms, + bool extended_filter) { + // If extended_filter is disabled we add an extra 10 ms for the unprocessed + // frame. That is simply how the algorithm is constructed. + return static_cast<int>( + (size_in_ms + (extended_filter ? 0 : 10)) * samples_per_frame_ / 10); +} + +// The tests should meet basic requirements and not be adjusted to what is +// actually implemented. If we don't get good code coverage this way we either +// lack in tests or have unnecessary code. +// General requirements: +// 1) If we add far-end data the system delay should be increased with the same +// amount we add. +// 2) If the far-end buffer is full we should flush the oldest data to make room +// for the new. In this case the system delay is unaffected. +// 3) There should exist a startup phase in which the buffer size is to be +// determined. In this phase no cancellation should be performed. +// 4) Under stable conditions (small variations in device buffer sizes) the AEC +// should determine an appropriate local buffer size within +// |kStableConvergenceMs| ms. +// 5) Under unstable conditions the AEC should make a decision within +// |kMaxConvergenceMs| ms. +// 6) If the local buffer runs out of data we should stuff the buffer with older +// frames. +// 7) The system delay should within |kMaxConvergenceMs| ms heal from +// disturbances like drift, data glitches, toggling events and outliers. +// 8) The system delay should never become negative. + +TEST_F(SystemDelayTest, CorrectIncreaseWhenBufferFarend) { + // When we add data to the AEC buffer the internal system delay should be + // incremented with the same amount as the size of data. + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + // Loop through a couple of calls to make sure the system delay + // increments correctly. + for (int j = 1; j <= 5; j++) { + EXPECT_EQ(0, + WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + EXPECT_EQ(static_cast<int>(j * samples_per_frame_), + WebRtcAec_system_delay(self_->aec)); + } + } + } + } +} + +// TODO(bjornv): Add a test to verify behavior if the far-end buffer is full +// when adding new data. + +TEST_F(SystemDelayTest, CorrectDelayAfterStableStartup) { + // We run the system in a stable startup. After that we verify that the system + // delay meets the requirements. + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + + // Verify system delay with respect to requirements, i.e., the + // |system_delay| is in the interval [75%, 100%] of what's reported on + // the average. + // In extended_filter mode we target 50% and measure after one processed + // 10 ms chunk. + int average_reported_delay = + static_cast<int>(kDeviceBufMs * samples_per_frame_ / 10); + EXPECT_GE(average_reported_delay, WebRtcAec_system_delay(self_->aec)); + int lower_bound = WebRtcAec_extended_filter_enabled(self_->aec) + ? (average_reported_delay / 2 - + rtc::checked_cast<int>(samples_per_frame_)) + : average_reported_delay * 3 / 4; + EXPECT_LE(lower_bound, WebRtcAec_system_delay(self_->aec)); + } + } + } +} + +TEST_F(SystemDelayTest, CorrectDelayAfterUnstableStartup) { + // This test does not apply in extended_filter mode, since we only use the + // the first 10 ms chunk to determine a reasonable buffer size. Neither does + // it apply if DA-AEC is on because that overrides the startup procedure. + WebRtcAec_enable_extended_filter(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec)); + WebRtcAec_enable_delay_agnostic(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec)); + + // In an unstable system we would start processing after |kMaxConvergenceMs|. + // On the last frame the AEC buffer is adjusted to 60% of the last reported + // device buffer size. + // We construct an unstable system by altering the device buffer size between + // two values |kDeviceBufMs| +- 25 ms. + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + + // To make sure we have a full buffer when we verify stability we first fill + // up the far-end buffer with the same amount as we will report in on the + // average through Process(). + size_t buffer_size = BufferFillUp(); + + int buffer_offset_ms = 25; + int reported_delay_ms = 0; + int process_time_ms = 0; + for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) { + reported_delay_ms = kDeviceBufMs + buffer_offset_ms; + RenderAndCapture(reported_delay_ms); + buffer_size += samples_per_frame_; + buffer_offset_ms = -buffer_offset_ms; + if (self_->startup_phase == 0) { + // We have left the startup phase. + break; + } + } + // Verify convergence time. + EXPECT_GE(kMaxConvergenceMs, process_time_ms); + // Verify that the buffer has been flushed. + EXPECT_GE(static_cast<int>(buffer_size), + WebRtcAec_system_delay(self_->aec)); + + // Verify system delay with respect to requirements, i.e., the + // |system_delay| is in the interval [60%, 100%] of what's last reported. + EXPECT_GE(static_cast<int>(reported_delay_ms * samples_per_frame_ / 10), + WebRtcAec_system_delay(self_->aec)); + EXPECT_LE( + static_cast<int>(reported_delay_ms * samples_per_frame_ / 10 * 3 / 5), + WebRtcAec_system_delay(self_->aec)); + } +} + +TEST_F(SystemDelayTest, CorrectDelayAfterStableBufferBuildUp) { + // This test does not apply in extended_filter mode, since we only use the + // the first 10 ms chunk to determine a reasonable buffer size. Neither does + // it apply if DA-AEC is on because that overrides the startup procedure. + WebRtcAec_enable_extended_filter(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(self_->aec)); + WebRtcAec_enable_delay_agnostic(self_->aec, 0); + EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(self_->aec)); + + // In this test we start by establishing the device buffer size during stable + // conditions, but with an empty internal far-end buffer. Once that is done we + // verify that the system delay is increased correctly until we have reach an + // internal buffer size of 75% of what's been reported. + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + + // We assume that running |kStableConvergenceMs| calls will put the + // algorithm in a state where the device buffer size has been determined. We + // can make that assumption since we have a separate stability test. + int process_time_ms = 0; + for (; process_time_ms < kStableConvergenceMs; process_time_ms += 10) { + EXPECT_EQ(0, + WebRtcAec_Process(handle_, + &near_ptr_, + 1, + &out_ptr_, + samples_per_frame_, + kDeviceBufMs, + 0)); + } + // Verify that a buffer size has been established. + EXPECT_EQ(0, self_->checkBuffSize); + + // We now have established the required buffer size. Let us verify that we + // fill up before leaving the startup phase for normal processing. + size_t buffer_size = 0; + size_t target_buffer_size = kDeviceBufMs * samples_per_frame_ / 10 * 3 / 4; + process_time_ms = 0; + for (; process_time_ms <= kMaxConvergenceMs; process_time_ms += 10) { + RenderAndCapture(kDeviceBufMs); + buffer_size += samples_per_frame_; + if (self_->startup_phase == 0) { + // We have left the startup phase. + break; + } + } + // Verify convergence time. + EXPECT_GT(kMaxConvergenceMs, process_time_ms); + // Verify that the buffer has reached the desired size. + EXPECT_LE(static_cast<int>(target_buffer_size), + WebRtcAec_system_delay(self_->aec)); + + // Verify normal behavior (system delay is kept constant) after startup by + // running a couple of calls to BufferFarend() and Process(). + for (int j = 0; j < 6; j++) { + int system_delay_before_calls = WebRtcAec_system_delay(self_->aec); + RenderAndCapture(kDeviceBufMs); + EXPECT_EQ(system_delay_before_calls, WebRtcAec_system_delay(self_->aec)); + } + } +} + +TEST_F(SystemDelayTest, CorrectDelayWhenBufferUnderrun) { + // Here we test a buffer under run scenario. If we keep on calling + // WebRtcAec_Process() we will finally run out of data, but should + // automatically stuff the buffer. We verify this behavior by checking if the + // system delay goes negative. + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + + // The AEC has now left the Startup phase. We now have at most + // |kStableConvergenceMs| in the buffer. Keep on calling Process() until + // we run out of data and verify that the system delay is non-negative. + for (int j = 0; j <= kStableConvergenceMs; j += 10) { + EXPECT_EQ(0, WebRtcAec_Process(handle_, &near_ptr_, 1, &out_ptr_, + samples_per_frame_, kDeviceBufMs, 0)); + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + } + } + } +} + +TEST_F(SystemDelayTest, CorrectDelayDuringDrift) { + // This drift test should verify that the system delay is never exceeding the + // device buffer. The drift is simulated by decreasing the reported device + // buffer size by 1 ms every 100 ms. If the device buffer size goes below 30 + // ms we jump (add) 10 ms to give a repeated pattern. + + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + + // We have left the startup phase and proceed with normal processing. + int jump = 0; + for (int j = 0; j < 1000; j++) { + // Drift = -1 ms per 100 ms of data. + int device_buf_ms = kDeviceBufMs - (j / 10) + jump; + int device_buf = MapBufferSizeToSamples(device_buf_ms, + extended_filter == 1); + + if (device_buf_ms < 30) { + // Add 10 ms data, taking affect next frame. + jump += 10; + } + RenderAndCapture(device_buf_ms); + + // Verify that the system delay does not exceed the device buffer. + EXPECT_GE(device_buf, WebRtcAec_system_delay(self_->aec)); + + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + } + } + } +} + +TEST_F(SystemDelayTest, ShouldRecoverAfterGlitch) { + // This glitch test should verify that the system delay recovers if there is + // a glitch in data. The data glitch is constructed as 200 ms of buffering + // after which the stable procedure continues. The glitch is never reported by + // the device. + // The system is said to be in a non-causal state if the difference between + // the device buffer and system delay is less than a block (64 samples). + + // This process should be independent of DA-AEC and extended_filter mode. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + int device_buf = MapBufferSizeToSamples(kDeviceBufMs, + extended_filter == 1); + // Glitch state. + for (int j = 0; j < 20; j++) { + EXPECT_EQ(0, + WebRtcAec_BufferFarend(handle_, far_, samples_per_frame_)); + // No need to verify system delay, since that is done in a separate + // test. + } + // Verify that we are in a non-causal state, i.e., + // |system_delay| > |device_buf|. + EXPECT_LT(device_buf, WebRtcAec_system_delay(self_->aec)); + + // Recover state. Should recover at least 4 ms of data per 10 ms, hence + // a glitch of 200 ms will take at most 200 * 10 / 4 = 500 ms to recover + // from. + bool non_causal = true; // We are currently in a non-causal state. + for (int j = 0; j < 50; j++) { + int system_delay_before = WebRtcAec_system_delay(self_->aec); + RenderAndCapture(kDeviceBufMs); + int system_delay_after = WebRtcAec_system_delay(self_->aec); + // We have recovered if + // |device_buf| - |system_delay_after| >= PART_LEN (1 block). + // During recovery, |system_delay_after| < |system_delay_before|, + // otherwise they are equal. + if (non_causal) { + EXPECT_LT(system_delay_after, system_delay_before); + if (device_buf - system_delay_after >= PART_LEN) { + non_causal = false; + } + } else { + EXPECT_EQ(system_delay_before, system_delay_after); + } + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + // Check that we have recovered. + EXPECT_FALSE(non_causal); + } + } + } +} + +TEST_F(SystemDelayTest, UnaffectedWhenSpuriousDeviceBufferValues) { + // This test does not apply in extended_filter mode, since we only use the + // the first 10 ms chunk to determine a reasonable buffer size. + const int extended_filter = 0; + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + + // Should be DA-AEC independent. + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + // This spurious device buffer data test aims at verifying that the system + // delay is unaffected by large outliers. + // The system is said to be in a non-causal state if the difference between + // the device buffer and system delay is less than a block (64 samples). + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + int device_buf = MapBufferSizeToSamples(kDeviceBufMs, + extended_filter == 1); + + // Normal state. We are currently not in a non-causal state. + bool non_causal = false; + + // Run 1 s and replace device buffer size with 500 ms every 100 ms. + for (int j = 0; j < 100; j++) { + int system_delay_before_calls = WebRtcAec_system_delay(self_->aec); + int device_buf_ms = j % 10 == 0 ? 500 : kDeviceBufMs; + RenderAndCapture(device_buf_ms); + + // Check for non-causality. + if (device_buf - WebRtcAec_system_delay(self_->aec) < PART_LEN) { + non_causal = true; + } + EXPECT_FALSE(non_causal); + EXPECT_EQ(system_delay_before_calls, + WebRtcAec_system_delay(self_->aec)); + + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + } + } +} + +TEST_F(SystemDelayTest, CorrectImpactWhenTogglingDeviceBufferValues) { + // This test aims at verifying that the system delay is "unaffected" by + // toggling values reported by the device. + // The test is constructed such that every other device buffer value is zero + // and then 2 * |kDeviceBufMs|, hence the size is constant on the average. The + // zero values will force us into a non-causal state and thereby lowering the + // system delay until we basically run out of data. Once that happens the + // buffer will be stuffed. + // TODO(bjornv): This test will have a better impact if we verified that the + // delay estimate goes up when the system delay goes down to meet the average + // device buffer size. + + // This test does not apply if DA-AEC is enabled and extended_filter mode + // disabled. + for (int extended_filter = 0; extended_filter <= 1; ++extended_filter) { + WebRtcAec_enable_extended_filter(self_->aec, extended_filter); + EXPECT_EQ(extended_filter, WebRtcAec_extended_filter_enabled(self_->aec)); + for (int da_aec = 0; da_aec <= 1; ++da_aec) { + WebRtcAec_enable_delay_agnostic(self_->aec, da_aec); + EXPECT_EQ(da_aec, WebRtcAec_delay_agnostic_enabled(self_->aec)); + if (extended_filter == 0 && da_aec == 1) { + continue; + } + for (size_t i = 0; i < kNumSampleRates; i++) { + Init(kSampleRateHz[i]); + RunStableStartup(); + const int device_buf = MapBufferSizeToSamples(kDeviceBufMs, + extended_filter == 1); + + // Normal state. We are currently not in a non-causal state. + bool non_causal = false; + + // Loop through 100 frames (both render and capture), which equals 1 s + // of data. Every odd frame we set the device buffer size to + // 2 * |kDeviceBufMs| and even frames we set the device buffer size to + // zero. + for (int j = 0; j < 100; j++) { + int system_delay_before_calls = WebRtcAec_system_delay(self_->aec); + int device_buf_ms = 2 * (j % 2) * kDeviceBufMs; + RenderAndCapture(device_buf_ms); + + // Check for non-causality, compared with the average device buffer + // size. + non_causal |= (device_buf - WebRtcAec_system_delay(self_->aec) < 64); + EXPECT_GE(system_delay_before_calls, + WebRtcAec_system_delay(self_->aec)); + + // Verify that the system delay is non-negative. + EXPECT_LE(0, WebRtcAec_system_delay(self_->aec)); + } + // Verify we are not in a non-causal state. + EXPECT_FALSE(non_causal); + } + } + } +} + +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc new file mode 100644 index 0000000000..fb3f48a918 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -0,0 +1,525 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +#if defined(WEBRTC_HAS_NEON) +#include <arm_neon.h> +#endif +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include <algorithm> +#include <functional> + +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace aec3 { + +// Computes and stores the frequency response of the filter. +void UpdateFrequencyResponse( + rtc::ArrayView<const FftData> H, + std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) { + RTC_DCHECK_EQ(H.size(), H2->size()); + for (size_t k = 0; k < H.size(); ++k) { + std::transform(H[k].re.begin(), H[k].re.end(), H[k].im.begin(), + (*H2)[k].begin(), + [](float a, float b) { return a * a + b * b; }); + } +} + +#if defined(WEBRTC_HAS_NEON) +// Computes and stores the frequency response of the filter. +void UpdateFrequencyResponse_NEON( + rtc::ArrayView<const FftData> H, + std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) { + RTC_DCHECK_EQ(H.size(), H2->size()); + for (size_t k = 0; k < H.size(); ++k) { + for (size_t j = 0; j < kFftLengthBy2; j += 4) { + const float32x4_t re = vld1q_f32(&H[k].re[j]); + const float32x4_t im = vld1q_f32(&H[k].im[j]); + float32x4_t H2_k_j = vmulq_f32(re, re); + H2_k_j = vmlaq_f32(H2_k_j, im, im); + vst1q_f32(&(*H2)[k][j], H2_k_j); + } + (*H2)[k][kFftLengthBy2] = H[k].re[kFftLengthBy2] * H[k].re[kFftLengthBy2] + + H[k].im[kFftLengthBy2] * H[k].im[kFftLengthBy2]; + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Computes and stores the frequency response of the filter. +void UpdateFrequencyResponse_SSE2( + rtc::ArrayView<const FftData> H, + std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) { + RTC_DCHECK_EQ(H.size(), H2->size()); + for (size_t k = 0; k < H.size(); ++k) { + for (size_t j = 0; j < kFftLengthBy2; j += 4) { + const __m128 re = _mm_loadu_ps(&H[k].re[j]); + const __m128 re2 = _mm_mul_ps(re, re); + const __m128 im = _mm_loadu_ps(&H[k].im[j]); + const __m128 im2 = _mm_mul_ps(im, im); + const __m128 H2_k_j = _mm_add_ps(re2, im2); + _mm_storeu_ps(&(*H2)[k][j], H2_k_j); + } + (*H2)[k][kFftLengthBy2] = H[k].re[kFftLengthBy2] * H[k].re[kFftLengthBy2] + + H[k].im[kFftLengthBy2] * H[k].im[kFftLengthBy2]; + } +} +#endif + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void UpdateErlEstimator( + const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, + std::array<float, kFftLengthBy2Plus1>* erl) { + erl->fill(0.f); + for (auto& H2_j : H2) { + std::transform(H2_j.begin(), H2_j.end(), erl->begin(), erl->begin(), + std::plus<float>()); + } +} + +#if defined(WEBRTC_HAS_NEON) +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void UpdateErlEstimator_NEON( + const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, + std::array<float, kFftLengthBy2Plus1>* erl) { + erl->fill(0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 4) { + const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]); + float32x4_t erl_k = vld1q_f32(&(*erl)[k]); + erl_k = vaddq_f32(erl_k, H2_j_k); + vst1q_f32(&(*erl)[k], erl_k); + } + (*erl)[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void UpdateErlEstimator_SSE2( + const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, + std::array<float, kFftLengthBy2Plus1>* erl) { + erl->fill(0.f); + for (auto& H2_j : H2) { + for (size_t k = 0; k < kFftLengthBy2; k += 4) { + const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]); + __m128 erl_k = _mm_loadu_ps(&(*erl)[k]); + erl_k = _mm_add_ps(erl_k, H2_j_k); + _mm_storeu_ps(&(*erl)[k], erl_k); + } + (*erl)[kFftLengthBy2] += H2_j[kFftLengthBy2]; + } +} +#endif + +// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)). +void AdaptPartitions(const RenderBuffer& render_buffer, + const FftData& G, + rtc::ArrayView<FftData> H) { + rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); + size_t index = render_buffer.Position(); + for (auto& H_j : H) { + const FftData& X = render_buffer_data[index]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + H_j.re[k] += X.re[k] * G.re[k] + X.im[k] * G.im[k]; + H_j.im[k] += X.re[k] * G.im[k] - X.im[k] * G.re[k]; + } + + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; + } +} + +#if defined(WEBRTC_HAS_NEON) +// Adapts the filter partitions. (NEON variant) +void AdaptPartitions_NEON(const RenderBuffer& render_buffer, + const FftData& G, + rtc::ArrayView<FftData> H) { + rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); + const int lim1 = + std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); + const int lim2 = H.size(); + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + FftData* H_j = &H[0]; + const FftData* X = &render_buffer_data[render_buffer.Position()]; + int limit = lim1; + int j = 0; + do { + for (; j < limit; ++j, ++H_j, ++X) { + for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const float32x4_t G_re = vld1q_f32(&G.re[k]); + const float32x4_t G_im = vld1q_f32(&G.im[k]); + const float32x4_t X_re = vld1q_f32(&X->re[k]); + const float32x4_t X_im = vld1q_f32(&X->im[k]); + const float32x4_t H_re = vld1q_f32(&H_j->re[k]); + const float32x4_t H_im = vld1q_f32(&H_j->im[k]); + const float32x4_t a = vmulq_f32(X_re, G_re); + const float32x4_t e = vmlaq_f32(a, X_im, G_im); + const float32x4_t c = vmulq_f32(X_re, G_im); + const float32x4_t f = vmlsq_f32(c, X_im, G_re); + const float32x4_t g = vaddq_f32(H_re, e); + const float32x4_t h = vaddq_f32(H_im, f); + + vst1q_f32(&H_j->re[k], g); + vst1q_f32(&H_j->im[k], h); + } + } + + X = &render_buffer_data[0]; + limit = lim2; + } while (j < lim2); + + H_j = &H[0]; + X = &render_buffer_data[render_buffer.Position()]; + limit = lim1; + j = 0; + do { + for (; j < limit; ++j, ++H_j, ++X) { + H_j->re[kFftLengthBy2] += X->re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X->im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_j->im[kFftLengthBy2] += X->re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X->im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + + X = &render_buffer_data[0]; + limit = lim2; + } while (j < lim2); +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Adapts the filter partitions. (SSE2 variant) +void AdaptPartitions_SSE2(const RenderBuffer& render_buffer, + const FftData& G, + rtc::ArrayView<FftData> H) { + rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); + const int lim1 = + std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); + const int lim2 = H.size(); + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + FftData* H_j; + const FftData* X; + int limit; + int j; + for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const __m128 G_re = _mm_loadu_ps(&G.re[k]); + const __m128 G_im = _mm_loadu_ps(&G.im[k]); + + H_j = &H[0]; + X = &render_buffer_data[render_buffer.Position()]; + limit = lim1; + j = 0; + do { + for (; j < limit; ++j, ++H_j, ++X) { + const __m128 X_re = _mm_loadu_ps(&X->re[k]); + const __m128 X_im = _mm_loadu_ps(&X->im[k]); + const __m128 H_re = _mm_loadu_ps(&H_j->re[k]); + const __m128 H_im = _mm_loadu_ps(&H_j->im[k]); + const __m128 a = _mm_mul_ps(X_re, G_re); + const __m128 b = _mm_mul_ps(X_im, G_im); + const __m128 c = _mm_mul_ps(X_re, G_im); + const __m128 d = _mm_mul_ps(X_im, G_re); + const __m128 e = _mm_add_ps(a, b); + const __m128 f = _mm_sub_ps(c, d); + const __m128 g = _mm_add_ps(H_re, e); + const __m128 h = _mm_add_ps(H_im, f); + _mm_storeu_ps(&H_j->re[k], g); + _mm_storeu_ps(&H_j->im[k], h); + } + + X = &render_buffer_data[0]; + limit = lim2; + } while (j < lim2); + } + + H_j = &H[0]; + X = &render_buffer_data[render_buffer.Position()]; + limit = lim1; + j = 0; + do { + for (; j < limit; ++j, ++H_j, ++X) { + H_j->re[kFftLengthBy2] += X->re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X->im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_j->im[kFftLengthBy2] += X->re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X->im[kFftLengthBy2] * G.re[kFftLengthBy2]; + } + + X = &render_buffer_data[0]; + limit = lim2; + } while (j < lim2); +} +#endif + +// Produces the filter output. +void ApplyFilter(const RenderBuffer& render_buffer, + rtc::ArrayView<const FftData> H, + FftData* S) { + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); + size_t index = render_buffer.Position(); + for (auto& H_j : H) { + const FftData& X = render_buffer_data[index]; + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + S->re[k] += X.re[k] * H_j.re[k] - X.im[k] * H_j.im[k]; + S->im[k] += X.re[k] * H_j.im[k] + X.im[k] * H_j.re[k]; + } + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; + } +} + +#if defined(WEBRTC_HAS_NEON) +// Produces the filter output (NEON variant). +void ApplyFilter_NEON(const RenderBuffer& render_buffer, + rtc::ArrayView<const FftData> H, + FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); + const int lim1 = + std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); + const int lim2 = H.size(); + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + const FftData* H_j = &H[0]; + const FftData* X = &render_buffer_data[render_buffer.Position()]; + + int j = 0; + int limit = lim1; + do { + for (; j < limit; ++j, ++H_j, ++X) { + for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const float32x4_t X_re = vld1q_f32(&X->re[k]); + const float32x4_t X_im = vld1q_f32(&X->im[k]); + const float32x4_t H_re = vld1q_f32(&H_j->re[k]); + const float32x4_t H_im = vld1q_f32(&H_j->im[k]); + const float32x4_t S_re = vld1q_f32(&S->re[k]); + const float32x4_t S_im = vld1q_f32(&S->im[k]); + const float32x4_t a = vmulq_f32(X_re, H_re); + const float32x4_t e = vmlsq_f32(a, X_im, H_im); + const float32x4_t c = vmulq_f32(X_re, H_im); + const float32x4_t f = vmlaq_f32(c, X_im, H_re); + const float32x4_t g = vaddq_f32(S_re, e); + const float32x4_t h = vaddq_f32(S_im, f); + vst1q_f32(&S->re[k], g); + vst1q_f32(&S->im[k], h); + } + } + limit = lim2; + X = &render_buffer_data[0]; + } while (j < lim2); + + H_j = &H[0]; + X = &render_buffer_data[render_buffer.Position()]; + j = 0; + limit = lim1; + do { + for (; j < limit; ++j, ++H_j, ++X) { + S->re[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->re[kFftLengthBy2] - + X->im[kFftLengthBy2] * H_j->im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->im[kFftLengthBy2] + + X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; + } + limit = lim2; + X = &render_buffer_data[0]; + } while (j < lim2); +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Produces the filter output (SSE2 variant). +void ApplyFilter_SSE2(const RenderBuffer& render_buffer, + rtc::ArrayView<const FftData> H, + FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); + S->re.fill(0.f); + S->im.fill(0.f); + + rtc::ArrayView<const FftData> render_buffer_data = render_buffer.Buffer(); + const int lim1 = + std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); + const int lim2 = H.size(); + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + const FftData* H_j = &H[0]; + const FftData* X = &render_buffer_data[render_buffer.Position()]; + + int j = 0; + int limit = lim1; + do { + for (; j < limit; ++j, ++H_j, ++X) { + for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { + const __m128 X_re = _mm_loadu_ps(&X->re[k]); + const __m128 X_im = _mm_loadu_ps(&X->im[k]); + const __m128 H_re = _mm_loadu_ps(&H_j->re[k]); + const __m128 H_im = _mm_loadu_ps(&H_j->im[k]); + const __m128 S_re = _mm_loadu_ps(&S->re[k]); + const __m128 S_im = _mm_loadu_ps(&S->im[k]); + const __m128 a = _mm_mul_ps(X_re, H_re); + const __m128 b = _mm_mul_ps(X_im, H_im); + const __m128 c = _mm_mul_ps(X_re, H_im); + const __m128 d = _mm_mul_ps(X_im, H_re); + const __m128 e = _mm_sub_ps(a, b); + const __m128 f = _mm_add_ps(c, d); + const __m128 g = _mm_add_ps(S_re, e); + const __m128 h = _mm_add_ps(S_im, f); + _mm_storeu_ps(&S->re[k], g); + _mm_storeu_ps(&S->im[k], h); + } + } + limit = lim2; + X = &render_buffer_data[0]; + } while (j < lim2); + + H_j = &H[0]; + X = &render_buffer_data[render_buffer.Position()]; + j = 0; + limit = lim1; + do { + for (; j < limit; ++j, ++H_j, ++X) { + S->re[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->re[kFftLengthBy2] - + X->im[kFftLengthBy2] * H_j->im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->im[kFftLengthBy2] + + X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; + } + limit = lim2; + X = &render_buffer_data[0]; + } while (j < lim2); +} +#endif + +} // namespace aec3 + +AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions, + Aec3Optimization optimization, + ApmDataDumper* data_dumper) + : data_dumper_(data_dumper), + fft_(), + optimization_(optimization), + H_(size_partitions), + H2_(size_partitions, std::array<float, kFftLengthBy2Plus1>()) { + RTC_DCHECK(data_dumper_); + + h_.fill(0.f); + for (auto& H_j : H_) { + H_j.Clear(); + } + for (auto& H2_k : H2_) { + H2_k.fill(0.f); + } + erl_.fill(0.f); +} + +AdaptiveFirFilter::~AdaptiveFirFilter() = default; + +void AdaptiveFirFilter::HandleEchoPathChange() { + h_.fill(0.f); + for (auto& H_j : H_) { + H_j.Clear(); + } + for (auto& H2_k : H2_) { + H2_k.fill(0.f); + } + erl_.fill(0.f); +} + +void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer, + FftData* S) const { + RTC_DCHECK(S); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::ApplyFilter_SSE2(render_buffer, H_, S); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::ApplyFilter_NEON(render_buffer, H_, S); + break; +#endif + default: + aec3::ApplyFilter(render_buffer, H_, S); + } +} + +void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, + const FftData& G) { + // Adapt the filter. + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::AdaptPartitions_SSE2(render_buffer, G, H_); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::AdaptPartitions_NEON(render_buffer, G, H_); + break; +#endif + default: + aec3::AdaptPartitions(render_buffer, G, H_); + } + + // Constrain the filter partitions in a cyclic manner. + Constrain(); + + // Update the frequency response and echo return loss for the filter. + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::UpdateFrequencyResponse_SSE2(H_, &H2_); + aec3::UpdateErlEstimator_SSE2(H2_, &erl_); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::UpdateFrequencyResponse_NEON(H_, &H2_); + aec3::UpdateErlEstimator_NEON(H2_, &erl_); + break; +#endif + default: + aec3::UpdateFrequencyResponse(H_, &H2_); + aec3::UpdateErlEstimator(H2_, &erl_); + } +} + +// Constrains the a partiton of the frequency domain filter to be limited in +// time via setting the relevant time-domain coefficients to zero. +void AdaptiveFirFilter::Constrain() { + std::array<float, kFftLength> h; + fft_.Ifft(H_[partition_to_constrain_], &h); + + static constexpr float kScale = 1.0f / kFftLengthBy2; + std::for_each(h.begin(), h.begin() + kFftLengthBy2, + [](float& a) { a *= kScale; }); + std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f); + + std::copy(h.begin(), h.begin() + kFftLengthBy2, + h_.begin() + partition_to_constrain_ * kFftLengthBy2); + + fft_.Fft(&h, &H_[partition_to_constrain_]); + + partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1) + ? partition_to_constrain_ + 1 + : 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h new file mode 100644 index 0000000000..7872869351 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ + +#include <array> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace aec3 { +// Computes and stores the frequency response of the filter. +void UpdateFrequencyResponse( + rtc::ArrayView<const FftData> H, + std::vector<std::array<float, kFftLengthBy2Plus1>>* H2); +#if defined(WEBRTC_HAS_NEON) +void UpdateFrequencyResponse_NEON( + rtc::ArrayView<const FftData> H, + std::vector<std::array<float, kFftLengthBy2Plus1>>* H2); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void UpdateFrequencyResponse_SSE2( + rtc::ArrayView<const FftData> H, + std::vector<std::array<float, kFftLengthBy2Plus1>>* H2); +#endif + +// Computes and stores the echo return loss estimate of the filter, which is the +// sum of the partition frequency responses. +void UpdateErlEstimator( + const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, + std::array<float, kFftLengthBy2Plus1>* erl); +#if defined(WEBRTC_HAS_NEON) +void UpdateErlEstimator_NEON( + const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, + std::array<float, kFftLengthBy2Plus1>* erl); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void UpdateErlEstimator_SSE2( + const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2, + std::array<float, kFftLengthBy2Plus1>* erl); +#endif + +// Adapts the filter partitions. +void AdaptPartitions(const RenderBuffer& render_buffer, + const FftData& G, + rtc::ArrayView<FftData> H); +#if defined(WEBRTC_HAS_NEON) +void AdaptPartitions_NEON(const RenderBuffer& render_buffer, + const FftData& G, + rtc::ArrayView<FftData> H); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void AdaptPartitions_SSE2(const RenderBuffer& render_buffer, + const FftData& G, + rtc::ArrayView<FftData> H); +#endif + +// Produces the filter output. +void ApplyFilter(const RenderBuffer& render_buffer, + rtc::ArrayView<const FftData> H, + FftData* S); +#if defined(WEBRTC_HAS_NEON) +void ApplyFilter_NEON(const RenderBuffer& render_buffer, + rtc::ArrayView<const FftData> H, + FftData* S); +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +void ApplyFilter_SSE2(const RenderBuffer& render_buffer, + rtc::ArrayView<const FftData> H, + FftData* S); +#endif + +} // namespace aec3 + +// Provides a frequency domain adaptive filter functionality. +class AdaptiveFirFilter { + public: + AdaptiveFirFilter(size_t size_partitions, + Aec3Optimization optimization, + ApmDataDumper* data_dumper); + + ~AdaptiveFirFilter(); + + // Produces the output of the filter. + void Filter(const RenderBuffer& render_buffer, FftData* S) const; + + // Adapts the filter. + void Adapt(const RenderBuffer& render_buffer, const FftData& G); + + // Receives reports that known echo path changes have occured and adjusts + // the filter adaptation accordingly. + void HandleEchoPathChange(); + + // Returns the filter size. + size_t SizePartitions() const { return H_.size(); } + + // Returns the filter based echo return loss. + const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; } + + // Returns the frequency responses for the filter partitions. + const std::vector<std::array<float, kFftLengthBy2Plus1>>& + FilterFrequencyResponse() const { + return H2_; + } + + // Returns the estimate of the impulse response. + const std::array<float, kAdaptiveFilterTimeDomainLength>& + FilterImpulseResponse() const { + return h_; + } + + void DumpFilter(const char* name) { + for (auto& H : H_) { + data_dumper_->DumpRaw(name, H.re); + data_dumper_->DumpRaw(name, H.im); + } + } + + private: + // Constrain the filter partitions in a cyclic manner. + void Constrain(); + + ApmDataDumper* const data_dumper_; + const Aec3Fft fft_; + const Aec3Optimization optimization_; + std::vector<FftData> H_; + std::vector<std::array<float, kFftLengthBy2Plus1>> H2_; + std::array<float, kAdaptiveFilterTimeDomainLength> h_; + std::array<float, kFftLengthBy2Plus1> erl_; + size_t partition_to_constrain_ = 0; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AdaptiveFirFilter); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc new file mode 100644 index 0000000000..11d7e02b41 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -0,0 +1,379 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" + +#include <math.h> +#include <algorithm> +#include <numeric> +#include <string> +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/cascaded_biquad_filter.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/shadow_filter_update_gain.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +std::string ProduceDebugText(size_t delay) { + std::ostringstream ss; + ss << ", Delay: " << delay; + return ss.str(); +} + +} // namespace + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized methods for filter adaptation are similar to +// their reference counterparts. +TEST(AdaptiveFirFilter, FilterAdaptationNeonOptimizations) { + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 12, + std::vector<size_t>(1, 12)); + Random random_generator(42U); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + FftData S_C; + FftData S_NEON; + FftData G; + Aec3Fft fft; + std::vector<FftData> H_C(10); + std::vector<FftData> H_NEON(10); + for (auto& H_j : H_C) { + H_j.Clear(); + } + for (auto& H_j : H_NEON) { + H_j.Clear(); + } + + for (size_t k = 0; k < 30; ++k) { + RandomizeSampleVector(&random_generator, x[0]); + render_buffer.Insert(x); + } + + for (size_t j = 0; j < G.re.size(); ++j) { + G.re[j] = j / 10001.f; + } + for (size_t j = 1; j < G.im.size() - 1; ++j) { + G.im[j] = j / 20001.f; + } + G.im[0] = 0.f; + G.im[G.im.size() - 1] = 0.f; + + AdaptPartitions_NEON(render_buffer, G, H_NEON); + AdaptPartitions(render_buffer, G, H_C); + AdaptPartitions_NEON(render_buffer, G, H_NEON); + AdaptPartitions(render_buffer, G, H_C); + + for (size_t l = 0; l < H_C.size(); ++l) { + for (size_t j = 0; j < H_C[l].im.size(); ++j) { + EXPECT_NEAR(H_C[l].re[j], H_NEON[l].re[j], fabs(H_C[l].re[j] * 0.00001f)); + EXPECT_NEAR(H_C[l].im[j], H_NEON[l].im[j], fabs(H_C[l].im[j] * 0.00001f)); + } + } + + ApplyFilter_NEON(render_buffer, H_NEON, &S_NEON); + ApplyFilter(render_buffer, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_NEAR(S_C.re[j], S_NEON.re[j], fabs(S_C.re[j] * 0.00001f)); + EXPECT_NEAR(S_C.im[j], S_NEON.im[j], fabs(S_C.re[j] * 0.00001f)); + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateFrequencyResponseNeonOptimization) { + const size_t kNumPartitions = 12; + std::vector<FftData> H(kNumPartitions); + std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions); + std::vector<std::array<float, kFftLengthBy2Plus1>> H2_NEON(kNumPartitions); + + for (size_t j = 0; j < H.size(); ++j) { + for (size_t k = 0; k < H[j].re.size(); ++k) { + H[j].re[k] = k + j / 3.f; + H[j].im[k] = j + k / 7.f; + } + } + + UpdateFrequencyResponse(H, &H2); + UpdateFrequencyResponse_NEON(H, &H2_NEON); + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H[j].re.size(); ++k) { + EXPECT_FLOAT_EQ(H2[j][k], H2_NEON[j][k]); + } + } +} + +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlNeonOptimization) { + const size_t kNumPartitions = 12; + std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions); + std::array<float, kFftLengthBy2Plus1> erl; + std::array<float, kFftLengthBy2Plus1> erl_NEON; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + UpdateErlEstimator(H2, &erl); + UpdateErlEstimator_NEON(H2, &erl_NEON); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_NEON[j]); + } +} + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods for filter adaptation are bitexact to +// their reference counterparts. +TEST(AdaptiveFirFilter, FilterAdaptationSse2Optimizations) { + bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 12, + std::vector<size_t>(1, 12)); + Random random_generator(42U); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + FftData S_C; + FftData S_SSE2; + FftData G; + Aec3Fft fft; + std::vector<FftData> H_C(10); + std::vector<FftData> H_SSE2(10); + for (auto& H_j : H_C) { + H_j.Clear(); + } + for (auto& H_j : H_SSE2) { + H_j.Clear(); + } + + for (size_t k = 0; k < 500; ++k) { + RandomizeSampleVector(&random_generator, x[0]); + render_buffer.Insert(x); + + ApplyFilter_SSE2(render_buffer, H_SSE2, &S_SSE2); + ApplyFilter(render_buffer, H_C, &S_C); + for (size_t j = 0; j < S_C.re.size(); ++j) { + EXPECT_FLOAT_EQ(S_C.re[j], S_SSE2.re[j]); + EXPECT_FLOAT_EQ(S_C.im[j], S_SSE2.im[j]); + } + + std::for_each(G.re.begin(), G.re.end(), + [&](float& a) { a = random_generator.Rand<float>(); }); + std::for_each(G.im.begin(), G.im.end(), + [&](float& a) { a = random_generator.Rand<float>(); }); + + AdaptPartitions_SSE2(render_buffer, G, H_SSE2); + AdaptPartitions(render_buffer, G, H_C); + + for (size_t k = 0; k < H_C.size(); ++k) { + for (size_t j = 0; j < H_C[k].re.size(); ++j) { + EXPECT_FLOAT_EQ(H_C[k].re[j], H_SSE2[k].re[j]); + EXPECT_FLOAT_EQ(H_C[k].im[j], H_SSE2[k].im[j]); + } + } + } + } +} + +// Verifies that the optimized method for frequency response computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateFrequencyResponseSse2Optimization) { + bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + const size_t kNumPartitions = 12; + std::vector<FftData> H(kNumPartitions); + std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions); + std::vector<std::array<float, kFftLengthBy2Plus1>> H2_SSE2(kNumPartitions); + + for (size_t j = 0; j < H.size(); ++j) { + for (size_t k = 0; k < H[j].re.size(); ++k) { + H[j].re[k] = k + j / 3.f; + H[j].im[k] = j + k / 7.f; + } + } + + UpdateFrequencyResponse(H, &H2); + UpdateFrequencyResponse_SSE2(H, &H2_SSE2); + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H[j].re.size(); ++k) { + EXPECT_FLOAT_EQ(H2[j][k], H2_SSE2[j][k]); + } + } + } +} + +// Verifies that the optimized method for echo return loss computation is +// bitexact to the reference counterpart. +TEST(AdaptiveFirFilter, UpdateErlSse2Optimization) { + bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + const size_t kNumPartitions = 12; + std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions); + std::array<float, kFftLengthBy2Plus1> erl; + std::array<float, kFftLengthBy2Plus1> erl_SSE2; + + for (size_t j = 0; j < H2.size(); ++j) { + for (size_t k = 0; k < H2[j].size(); ++k) { + H2[j][k] = k + j / 3.f; + } + } + + UpdateErlEstimator(H2, &erl); + UpdateErlEstimator_SSE2(H2, &erl_SSE2); + + for (size_t j = 0; j < erl.size(); ++j) { + EXPECT_FLOAT_EQ(erl[j], erl_SSE2[j]); + } + } +} + +#endif + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check for non-null data dumper works. +TEST(AdaptiveFirFilter, NullDataDumper) { + EXPECT_DEATH(AdaptiveFirFilter(9, DetectOptimization(), nullptr), ""); +} + +// Verifies that the check for non-null filter output works. +TEST(AdaptiveFirFilter, NullFilterOutput) { + ApmDataDumper data_dumper(42); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, + filter.SizePartitions(), + std::vector<size_t>(1, filter.SizePartitions())); + EXPECT_DEATH(filter.Filter(render_buffer, nullptr), ""); +} + +#endif + +// Verifies that the filter statistics can be accessed when filter statistics +// are turned on. +TEST(AdaptiveFirFilter, FilterStatisticsAccess) { + ApmDataDumper data_dumper(42); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); + filter.Erl(); + filter.FilterFrequencyResponse(); +} + +// Verifies that the filter size if correctly repported. +TEST(AdaptiveFirFilter, FilterSize) { + ApmDataDumper data_dumper(42); + for (size_t filter_size = 1; filter_size < 5; ++filter_size) { + AdaptiveFirFilter filter(filter_size, DetectOptimization(), &data_dumper); + EXPECT_EQ(filter_size, filter.SizePartitions()); + } +} + +// Verifies that the filter is being able to properly filter a signal and to +// adapt its coefficients. +TEST(AdaptiveFirFilter, FilterAndAdapt) { + constexpr size_t kNumBlocksToProcess = 500; + ApmDataDumper data_dumper(42); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); + Aec3Fft fft; + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, + filter.SizePartitions(), + std::vector<size_t>(1, filter.SizePartitions())); + ShadowFilterUpdateGain gain; + Random random_generator(42U); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + std::vector<float> n(kBlockSize, 0.f); + std::vector<float> y(kBlockSize, 0.f); + AecState aec_state(EchoCanceller3Config{}); + RenderSignalAnalyzer render_signal_analyzer; + std::vector<float> e(kBlockSize, 0.f); + std::array<float, kFftLength> s_scratch; + std::array<float, kBlockSize> s; + FftData S; + FftData G; + FftData E; + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kFftLengthBy2Plus1> E2_main; + std::array<float, kFftLengthBy2Plus1> E2_shadow; + // [B,A] = butter(2,100/8000,'high') + constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + Y2.fill(0.f); + E2_main.fill(0.f); + E2_shadow.fill(0.f); + + constexpr float kScale = 1.0f / kFftLengthBy2; + + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + DelayBuffer<float> delay_buffer(delay_samples); + CascadedBiQuadFilter x_hp_filter(kHighPassFilterCoefficients, 1); + CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1); + + SCOPED_TRACE(ProduceDebugText(delay_samples)); + for (size_t k = 0; k < kNumBlocksToProcess; ++k) { + RandomizeSampleVector(&random_generator, x[0]); + delay_buffer.Delay(x[0], y); + + RandomizeSampleVector(&random_generator, n); + static constexpr float kNoiseScaling = 1.f / 100.f; + std::transform( + y.begin(), y.end(), n.begin(), y.begin(), + [](float a, float b) { return a + b * kNoiseScaling; }); + + x_hp_filter.Process(x[0]); + y_hp_filter.Process(y); + + render_buffer.Insert(x); + render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay()); + + filter.Filter(render_buffer, &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e.begin(), e.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e, &E); + for (size_t k = 0; k < kBlockSize; ++k) { + s[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + + gain.Compute(render_buffer, render_signal_analyzer, E, + filter.SizePartitions(), false, &G); + filter.Adapt(render_buffer, G); + aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); + aec_state.Update(filter.FilterFrequencyResponse(), + filter.FilterImpulseResponse(), true, rtc::nullopt, + render_buffer, E2_main, Y2, x[0], s, false); + } + // Verify that the filter is able to perform well. + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + ASSERT_TRUE(aec_state.FilterDelay()); + EXPECT_EQ(delay_samples / kBlockSize, *aec_state.FilterDelay()); + } +} +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_common.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_common.cc new file mode 100644 index 0000000000..7becce49f1 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_common.cc @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_common.h" + +#include "typedefs.h" // NOLINT(build/include) +#include "system_wrappers/include/cpu_features_wrapper.h" + +namespace webrtc { + +Aec3Optimization DetectOptimization() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (WebRtc_GetCPUInfo(kSSE2) != 0) { + return Aec3Optimization::kSse2; + } +#endif + +#if defined(WEBRTC_HAS_NEON) + return Aec3Optimization::kNeon; +#endif + + return Aec3Optimization::kNone; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_common.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_common.h new file mode 100644 index 0000000000..a85f3ace21 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_common.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ + +#include <stddef.h> +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +enum class Aec3Optimization { kNone, kSse2, kNeon }; + +constexpr int kNumBlocksPerSecond = 250; + +constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond; +constexpr int kMetricsComputationBlocks = 9; +constexpr int kMetricsCollectionBlocks = + kMetricsReportingIntervalBlocks - kMetricsComputationBlocks; + +constexpr size_t kFftLengthBy2 = 64; +constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1; +constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1; +constexpr size_t kFftLength = 2 * kFftLengthBy2; + +constexpr int kAdaptiveFilterLength = 12; +constexpr int kUnknownDelayRenderWindowSize = 12; +constexpr int kAdaptiveFilterTimeDomainLength = + kAdaptiveFilterLength * kFftLengthBy2; + +constexpr size_t kMaxNumBands = 3; +constexpr size_t kSubFrameLength = 80; + +constexpr size_t kBlockSize = kFftLengthBy2; +constexpr size_t kExtendedBlockSize = 2 * kFftLengthBy2; +constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32; +constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks = + kMatchedFilterWindowSizeSubBlocks * 3 / 4; + +constexpr size_t kMinEchoPathDelayBlocks = 5; +constexpr size_t kMaxApiCallsJitterBlocks = 26; +constexpr size_t kRenderTransferQueueSize = kMaxApiCallsJitterBlocks / 2; +static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks, + "Requirement to ensure buffer overflow detection"); + +constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond; + +// TODO(peah): Integrate this with how it is done inside audio_processing_impl. +constexpr size_t NumBandsForRate(int sample_rate_hz) { + return static_cast<size_t>(sample_rate_hz == 8000 ? 1 + : sample_rate_hz / 16000); +} +constexpr int LowestBandRate(int sample_rate_hz) { + return sample_rate_hz == 8000 ? sample_rate_hz : 16000; +} + +constexpr bool ValidFullBandRate(int sample_rate_hz) { + return sample_rate_hz == 8000 || sample_rate_hz == 16000 || + sample_rate_hz == 32000 || sample_rate_hz == 48000; +} + +constexpr size_t GetDownSampledBufferSize(size_t down_sampling_factor, + size_t num_matched_filters) { + return kBlockSize / down_sampling_factor * + (kMatchedFilterAlignmentShiftSizeSubBlocks * num_matched_filters + + kMatchedFilterWindowSizeSubBlocks + 1); +} + +constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor, + size_t num_matched_filters) { + return (3 * + GetDownSampledBufferSize(down_sampling_factor, num_matched_filters)) / + (4 * kBlockSize / down_sampling_factor); +} + +// Detects what kind of optimizations to use for the code. +Aec3Optimization DetectOptimization(); + +static_assert(1 == NumBandsForRate(8000), "Number of bands for 8 kHz"); +static_assert(1 == NumBandsForRate(16000), "Number of bands for 16 kHz"); +static_assert(2 == NumBandsForRate(32000), "Number of bands for 32 kHz"); +static_assert(3 == NumBandsForRate(48000), "Number of bands for 48 kHz"); + +static_assert(8000 == LowestBandRate(8000), "Sample rate of band 0 for 8 kHz"); +static_assert(16000 == LowestBandRate(16000), + "Sample rate of band 0 for 16 kHz"); +static_assert(16000 == LowestBandRate(32000), + "Sample rate of band 0 for 32 kHz"); +static_assert(16000 == LowestBandRate(48000), + "Sample rate of band 0 for 48 kHz"); + +static_assert(ValidFullBandRate(8000), + "Test that 8 kHz is a valid sample rate"); +static_assert(ValidFullBandRate(16000), + "Test that 16 kHz is a valid sample rate"); +static_assert(ValidFullBandRate(32000), + "Test that 32 kHz is a valid sample rate"); +static_assert(ValidFullBandRate(48000), + "Test that 48 kHz is a valid sample rate"); +static_assert(!ValidFullBandRate(8001), + "Test that 8001 Hz is not a valid sample rate"); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft.cc new file mode 100644 index 0000000000..c8120cb864 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft.cc @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_fft.h" + +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { + +// TODO(peah): Change x to be std::array once the rest of the code allows this. +void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const { + RTC_DCHECK(X); + RTC_DCHECK_EQ(kFftLengthBy2, x.size()); + std::array<float, kFftLength> fft; + std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f); + std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2); + Fft(&fft, X); +} + +void Aec3Fft::PaddedFft(rtc::ArrayView<const float> x, + rtc::ArrayView<float> x_old, + FftData* X) const { + RTC_DCHECK(X); + RTC_DCHECK_EQ(kFftLengthBy2, x.size()); + RTC_DCHECK_EQ(kFftLengthBy2, x_old.size()); + std::array<float, kFftLength> fft; + std::copy(x_old.begin(), x_old.end(), fft.begin()); + std::copy(x.begin(), x.end(), fft.begin() + x_old.size()); + std::copy(x.begin(), x.end(), x_old.begin()); + Fft(&fft, X); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft.h new file mode 100644 index 0000000000..2a5dfef47f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ + +#include <array> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/utility/ooura_fft.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Wrapper class that provides 128 point real valued FFT functionality with the +// FftData type. +class Aec3Fft { + public: + Aec3Fft() = default; + // Computes the FFT. Note that both the input and output are modified. + void Fft(std::array<float, kFftLength>* x, FftData* X) const { + RTC_DCHECK(x); + RTC_DCHECK(X); + ooura_fft_.Fft(x->data()); + X->CopyFromPackedArray(*x); + } + // Computes the inverse Fft. + void Ifft(const FftData& X, std::array<float, kFftLength>* x) const { + RTC_DCHECK(x); + X.CopyToPackedArray(x); + ooura_fft_.InverseFft(x->data()); + } + + // Pads the input with kFftLengthBy2 initial zeros before computing the Fft. + void ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const; + + // Concatenates the kFftLengthBy2 values long x and x_old before computing the + // Fft. After that, x is copied to x_old. + void PaddedFft(rtc::ArrayView<const float> x, + rtc::ArrayView<float> x_old, + FftData* X) const; + + private: + const OouraFft ooura_fft_; + + RTC_DISALLOW_COPY_AND_ASSIGN(Aec3Fft); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc new file mode 100644 index 0000000000..46831e021a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft_unittest.cc @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_fft.h" + +#include <algorithm> + +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null input in Fft works. +TEST(Aec3Fft, NullFftInput) { + Aec3Fft fft; + FftData X; + EXPECT_DEATH(fft.Fft(nullptr, &X), ""); +} + +// Verifies that the check for non-null input in Fft works. +TEST(Aec3Fft, NullFftOutput) { + Aec3Fft fft; + std::array<float, kFftLength> x; + EXPECT_DEATH(fft.Fft(&x, nullptr), ""); +} + +// Verifies that the check for non-null output in Ifft works. +TEST(Aec3Fft, NullIfftOutput) { + Aec3Fft fft; + FftData X; + EXPECT_DEATH(fft.Ifft(X, nullptr), ""); +} + +// Verifies that the check for non-null output in ZeroPaddedFft works. +TEST(Aec3Fft, NullZeroPaddedFftOutput) { + Aec3Fft fft; + std::array<float, kFftLengthBy2> x; + EXPECT_DEATH(fft.ZeroPaddedFft(x, nullptr), ""); +} + +// Verifies that the check for input length in ZeroPaddedFft works. +TEST(Aec3Fft, ZeroPaddedFftWrongInputLength) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLengthBy2 - 1> x; + EXPECT_DEATH(fft.ZeroPaddedFft(x, &X), ""); +} + +// Verifies that the check for non-null output in PaddedFft works. +TEST(Aec3Fft, NullPaddedFftOutput) { + Aec3Fft fft; + std::array<float, kFftLengthBy2> x; + std::array<float, kFftLengthBy2> x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, nullptr), ""); +} + +// Verifies that the check for input length in PaddedFft works. +TEST(Aec3Fft, PaddedFftWrongInputLength) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLengthBy2 - 1> x; + std::array<float, kFftLengthBy2> x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, &X), ""); +} + +// Verifies that the check for length in the old value in PaddedFft works. +TEST(Aec3Fft, PaddedFftWrongOldValuesLength) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLengthBy2> x; + std::array<float, kFftLengthBy2 - 1> x_old; + EXPECT_DEATH(fft.PaddedFft(x, x_old, &X), ""); +} + +#endif + +// Verifies that Fft works as intended. +TEST(Aec3Fft, Fft) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLength> x; + x.fill(0.f); + fft.Fft(&x, &X); + EXPECT_THAT(X.re, ::testing::Each(0.f)); + EXPECT_THAT(X.im, ::testing::Each(0.f)); + + x.fill(0.f); + x[0] = 1.f; + fft.Fft(&x, &X); + EXPECT_THAT(X.re, ::testing::Each(1.f)); + EXPECT_THAT(X.im, ::testing::Each(0.f)); + + x.fill(1.f); + fft.Fft(&x, &X); + EXPECT_EQ(128.f, X.re[0]); + std::for_each(X.re.begin() + 1, X.re.end(), + [](float a) { EXPECT_EQ(0.f, a); }); + EXPECT_THAT(X.im, ::testing::Each(0.f)); +} + +// Verifies that InverseFft works as intended. +TEST(Aec3Fft, Ifft) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLength> x; + + X.re.fill(0.f); + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_THAT(x, ::testing::Each(0.f)); + + X.re.fill(1.f); + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_EQ(64.f, x[0]); + std::for_each(x.begin() + 1, x.end(), [](float a) { EXPECT_EQ(0.f, a); }); + + X.re.fill(0.f); + X.re[0] = 128; + X.im.fill(0.f); + fft.Ifft(X, &x); + EXPECT_THAT(x, ::testing::Each(64.f)); +} + +// Verifies that InverseFft and Fft work as intended. +TEST(Aec3Fft, FftAndIfft) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLength> x; + std::array<float, kFftLength> x_ref; + + int v = 0; + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x.size(); ++j) { + x[j] = v++; + x_ref[j] = x[j] * 64.f; + } + fft.Fft(&x, &X); + fft.Ifft(X, &x); + for (size_t j = 0; j < x.size(); ++j) { + EXPECT_NEAR(x_ref[j], x[j], 0.001f); + } + } +} + +// Verifies that ZeroPaddedFft work as intended. +TEST(Aec3Fft, ZeroPaddedFft) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLengthBy2> x_in; + std::array<float, kFftLength> x_ref; + std::array<float, kFftLength> x_out; + + int v = 0; + x_ref.fill(0.f); + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x_in.size(); ++j) { + x_in[j] = v++; + x_ref[j + kFftLengthBy2] = x_in[j] * 64.f; + } + fft.ZeroPaddedFft(x_in, &X); + fft.Ifft(X, &x_out); + for (size_t j = 0; j < x_out.size(); ++j) { + EXPECT_NEAR(x_ref[j], x_out[j], 0.1f); + } + } +} + +// Verifies that ZeroPaddedFft work as intended. +TEST(Aec3Fft, PaddedFft) { + Aec3Fft fft; + FftData X; + std::array<float, kFftLengthBy2> x_in; + std::array<float, kFftLength> x_out; + std::array<float, kFftLengthBy2> x_old; + std::array<float, kFftLengthBy2> x_old_ref; + std::array<float, kFftLength> x_ref; + + int v = 0; + x_old.fill(0.f); + for (int k = 0; k < 20; ++k) { + for (size_t j = 0; j < x_in.size(); ++j) { + x_in[j] = v++; + } + + std::copy(x_old.begin(), x_old.end(), x_ref.begin()); + std::copy(x_in.begin(), x_in.end(), x_ref.begin() + kFftLengthBy2); + std::copy(x_in.begin(), x_in.end(), x_old_ref.begin()); + std::for_each(x_ref.begin(), x_ref.end(), [](float& a) { a *= 64.f; }); + + fft.PaddedFft(x_in, x_old, &X); + fft.Ifft(X, &x_out); + + for (size_t j = 0; j < x_out.size(); ++j) { + EXPECT_NEAR(x_ref[j], x_out[j], 0.1f); + } + + EXPECT_EQ(x_old_ref, x_old); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state.cc new file mode 100644 index 0000000000..0318b04ef4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state.cc @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec_state.h" + +#include <math.h> + +#include <numeric> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Computes delay of the adaptive filter. +int EstimateFilterDelay( + const std::vector<std::array<float, kFftLengthBy2Plus1>>& + adaptive_filter_frequency_response) { + const auto& H2 = adaptive_filter_frequency_response; + constexpr size_t kUpperBin = kFftLengthBy2 - 5; + RTC_DCHECK_GE(kAdaptiveFilterLength, H2.size()); + std::array<int, kAdaptiveFilterLength> delays; + delays.fill(0); + for (size_t k = 1; k < kUpperBin; ++k) { + // Find the maximum of H2[j]. + size_t peak = 0; + for (size_t j = 0; j < H2.size(); ++j) { + if (H2[j][k] > H2[peak][k]) { + peak = j; + } + } + ++delays[peak]; + } + + return std::distance(delays.begin(), + std::max_element(delays.begin(), delays.end())); +} + +} // namespace + +int AecState::instance_count_ = 0; + +AecState::AecState(const EchoCanceller3Config& config) + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h), + config_(config), + reverb_decay_(config_.ep_strength.default_len) { + max_render_.fill(0.f); +} + +AecState::~AecState() = default; + +void AecState::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + if (echo_path_variability.AudioPathChanged()) { + blocks_since_last_saturation_ = 0; + usable_linear_estimate_ = false; + echo_leakage_detected_ = false; + capture_signal_saturation_ = false; + echo_saturation_ = false; + previous_max_sample_ = 0.f; + max_render_.fill(0.f); + + if (echo_path_variability.delay_change) { + force_zero_gain_counter_ = 0; + blocks_with_filter_adaptation_ = 0; + blocks_with_strong_render_ = 0; + initial_state_ = true; + linear_echo_estimate_ = false; + sufficient_filter_updates_ = false; + render_received_ = false; + force_zero_gain_ = true; + capture_block_counter_ = 0; + } + if (echo_path_variability.gain_change) { + capture_block_counter_ = kNumBlocksPerSecond; + } + } +} + +void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& + adaptive_filter_frequency_response, + const std::array<float, kAdaptiveFilterTimeDomainLength>& + adaptive_filter_impulse_response, + bool converged_filter, + const rtc::Optional<size_t>& external_delay_samples, + const RenderBuffer& render_buffer, + const std::array<float, kFftLengthBy2Plus1>& E2_main, + const std::array<float, kFftLengthBy2Plus1>& Y2, + rtc::ArrayView<const float> x, + const std::array<float, kBlockSize>& s, + bool echo_leakage_detected) { + // Store input parameters. + echo_leakage_detected_ = echo_leakage_detected; + + // Update counters. + ++capture_block_counter_; + + // Force zero echo suppression gain after an echo path change to allow at + // least some render data to be collected in order to avoid an initial echo + // burst. + force_zero_gain_ = (++force_zero_gain_counter_) < kNumBlocksPerSecond / 5; + + // Estimate delays. + filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); + external_delay_ = + external_delay_samples + ? rtc::Optional<size_t>(*external_delay_samples / kBlockSize) + : rtc::nullopt; + + // Update the ERL and ERLE measures. + if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) { + const auto& X2 = render_buffer.Spectrum(*filter_delay_); + erle_estimator_.Update(X2, Y2, E2_main); + erl_estimator_.Update(X2, Y2); + } + + // Update the echo audibility evaluator. + echo_audibility_.Update(x, s, converged_filter); + + // Detect and flag echo saturation. + // TODO(peah): Add the delay in this computation to ensure that the render and + // capture signals are properly aligned. + RTC_DCHECK_LT(0, x.size()); + const float max_sample = fabs(*std::max_element( + x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); + + if (config_.ep_strength.echo_can_saturate) { + const bool saturated_echo = + (previous_max_sample_ > 200.f) && SaturatedCapture(); + + // Counts the blocks since saturation. + constexpr size_t kSaturationLeakageBlocks = 20; + + // Set flag for potential presence of saturated echo + blocks_since_last_saturation_ = + saturated_echo ? 0 : blocks_since_last_saturation_ + 1; + + echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; + } else { + echo_saturation_ = false; + } + previous_max_sample_ = max_sample; + + // TODO(peah): Move? + sufficient_filter_updates_ = + blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks; + initial_state_ = capture_block_counter_ < 3 * kNumBlocksPerSecond; + + // Flag whether the linear filter estimate is usable. + usable_linear_estimate_ = + (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) && + capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_; + + linear_echo_estimate_ = UsableLinearEstimate() && !TransparentMode(); + + // After an amount of active render samples for which an echo should have been + // detected in the capture signal if the ERL was not infinite, flag that a + // transparent mode should be entered. + const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + const bool active_render_block = + x_energy > (config_.render_levels.active_render_limit * + config_.render_levels.active_render_limit) * + kFftLengthBy2; + + if (active_render_block) { + render_received_ = true; + } + + // Update counters. + blocks_with_filter_adaptation_ += + (active_render_block && (!SaturatedCapture()) ? 1 : 0); + + transparent_mode_ = !converged_filter && + (!render_received_ || blocks_with_filter_adaptation_ >= + 5 * kNumBlocksPerSecond); + + // Update the room reverb estimate. + UpdateReverb(adaptive_filter_impulse_response); +} + +void AecState::UpdateReverb( + const std::array<float, kAdaptiveFilterTimeDomainLength>& + impulse_response) { + if ((!(filter_delay_ && usable_linear_estimate_)) || + (*filter_delay_ > kAdaptiveFilterLength - 4)) { + return; + } + + // Form the data to match against by squaring the impulse response + // coefficients. + std::array<float, kAdaptiveFilterTimeDomainLength> matching_data; + std::transform(impulse_response.begin(), impulse_response.end(), + matching_data.begin(), [](float a) { return a * a; }); + + // Avoid matching against noise in the model by subtracting an estimate of the + // model noise power. + constexpr size_t kTailLength = 64; + constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength; + const float tail_power = *std::max_element(matching_data.begin() + tail_index, + matching_data.end()); + std::for_each(matching_data.begin(), matching_data.begin() + tail_index, + [tail_power](float& a) { a = std::max(0.f, a - tail_power); }); + + // Identify the peak index of the impulse response. + const size_t peak_index = *std::max_element( + matching_data.begin(), matching_data.begin() + tail_index); + + if (peak_index + 128 < tail_index) { + size_t start_index = peak_index + 64; + // Compute the matching residual error for the current candidate to match. + float residual_sqr_sum = 0.f; + float d_k = reverb_decay_to_test_; + for (size_t k = start_index; k < tail_index; ++k) { + if (matching_data[start_index + 1] == 0.f) { + break; + } + + float residual = matching_data[k] - matching_data[peak_index] * d_k; + residual_sqr_sum += residual * residual; + d_k *= reverb_decay_to_test_; + } + + // If needed, update the best candidate for the reverb decay. + if (reverb_decay_candidate_residual_ < 0.f || + residual_sqr_sum < reverb_decay_candidate_residual_) { + reverb_decay_candidate_residual_ = residual_sqr_sum; + reverb_decay_candidate_ = reverb_decay_to_test_; + } + } + + // Compute the next reverb candidate to evaluate such that all candidates will + // be evaluated within one second. + reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond); + + // If all reverb candidates have been evaluated, choose the best one as the + // reverb decay. + if (reverb_decay_to_test_ >= 0.9965f) { + if (reverb_decay_candidate_residual_ < 0.f) { + // Transform the decay to be in the unit of blocks. + reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2); + + // Limit the estimated reverb_decay_ to the maximum one needed in practice + // to minimize the impact of incorrect estimates. + reverb_decay_ = std::min(config_.ep_strength.default_len, reverb_decay_); + } + reverb_decay_to_test_ = 0.9f; + reverb_decay_candidate_residual_ = -1.f; + } + + // For noisy impulse responses, assume a fixed tail length. + if (tail_power > 0.0005f) { + reverb_decay_ = config_.ep_strength.default_len; + } + data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_); + data_dumper_->DumpRaw("aec3_tail_power", tail_power); +} + +void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x, + const std::array<float, kBlockSize>& s, + bool converged_filter) { + auto result_x = std::minmax_element(x.begin(), x.end()); + auto result_s = std::minmax_element(s.begin(), s.end()); + const float x_abs = + std::max(fabsf(*result_x.first), fabsf(*result_x.second)); + const float s_abs = + std::max(fabsf(*result_s.first), fabsf(*result_s.second)); + + if (converged_filter) { + if (x_abs < 20.f) { + ++low_farend_counter_; + } else { + low_farend_counter_ = 0; + } + } else { + if (x_abs < 100.f) { + ++low_farend_counter_; + } else { + low_farend_counter_ = 0; + } + } + + // The echo is deemed as not audible if the echo estimate is on the level of + // the quantization noise in the FFTs and the nearend level is sufficiently + // strong to mask that by ensuring that the playout and AGC gains do not boost + // any residual echo that is below the quantization noise level. Furthermore, + // cases where the render signal is very close to zero are also identified as + // not producing audible echo. + inaudible_echo_ = (max_nearend_ > 500 && s_abs < 30.f) || + (!converged_filter && x_abs < 500); + inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20; +} + +void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView<const float> e) { + const float e_max = *std::max_element(e.begin(), e.end()); + const float e_min = *std::min_element(e.begin(), e.end()); + const float e_abs = std::max(fabsf(e_max), fabsf(e_min)); + + if (max_nearend_ < e_abs) { + max_nearend_ = e_abs; + max_nearend_counter_ = 0; + } else { + if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) { + max_nearend_ *= 0.995f; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state.h new file mode 100644 index 0000000000..b8c1523a00 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ + +#include <algorithm> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "api/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/erl_estimator.h" +#include "modules/audio_processing/aec3/erle_estimator.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +// Handles the state and the conditions for the echo removal functionality. +class AecState { + public: + explicit AecState(const EchoCanceller3Config& config); + ~AecState(); + + // Returns whether the linear filter estimate is usable. + bool UsableLinearEstimate() const { return usable_linear_estimate_; } + + // Returns whether there has been echo leakage detected. + bool EchoLeakageDetected() const { return echo_leakage_detected_; } + + // Returns whether the render signal is currently active. + // TODO(peah): Deprecate this in an upcoming CL. + bool ActiveRender() const { return blocks_with_filter_adaptation_ > 200; } + + // Returns the ERLE. + const std::array<float, kFftLengthBy2Plus1>& Erle() const { + return erle_estimator_.Erle(); + } + + // Returns the time-domain ERLE. + float ErleTimeDomain() const { return erle_estimator_.ErleTimeDomain(); } + + // Returns the ERL. + const std::array<float, kFftLengthBy2Plus1>& Erl() const { + return erl_estimator_.Erl(); + } + + // Returns the time-domain ERL. + float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); } + + // Returns the delay estimate based on the linear filter. + rtc::Optional<size_t> FilterDelay() const { return filter_delay_; } + + // Returns the externally provided delay. + rtc::Optional<size_t> ExternalDelay() const { return external_delay_; } + + // Returns whether the capture signal is saturated. + bool SaturatedCapture() const { return capture_signal_saturation_; } + + // Returns whether the echo signal is saturated. + bool SaturatedEcho() const { return echo_saturation_; } + + // Returns whether the echo path can saturate. + bool SaturatingEchoPath() const { return saturating_echo_path_; } + + // Updates the capture signal saturation. + void UpdateCaptureSaturation(bool capture_signal_saturation) { + capture_signal_saturation_ = capture_signal_saturation; + } + + // Returns whether the transparent mode is active + bool TransparentMode() const { return transparent_mode_; } + + // Takes appropriate action at an echo path change. + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Returns the decay factor for the echo reverberation. + float ReverbDecay() const { return reverb_decay_; } + + // Returns whether the echo suppression gain should be forced to zero. + bool ForcedZeroGain() const { return force_zero_gain_; } + + // Returns whether the echo in the capture signal is audible. + bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); } + + // Updates the aec state with the AEC output signal. + void UpdateWithOutput(rtc::ArrayView<const float> e) { + echo_audibility_.UpdateWithOutput(e); + } + + // Returns whether the linear filter should have been able to adapt properly. + bool SufficientFilterUpdates() const { return sufficient_filter_updates_; } + + // Returns whether the echo subtractor can be used to determine the residual + // echo. + bool LinearEchoEstimate() const { return linear_echo_estimate_; } + + // Returns whether the AEC is in an initial state. + bool InitialState() const { return initial_state_; } + + // Updates the aec state. + void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& + adaptive_filter_frequency_response, + const std::array<float, kAdaptiveFilterTimeDomainLength>& + adaptive_filter_impulse_response, + bool converged_filter, + const rtc::Optional<size_t>& external_delay_samples, + const RenderBuffer& render_buffer, + const std::array<float, kFftLengthBy2Plus1>& E2_main, + const std::array<float, kFftLengthBy2Plus1>& Y2, + rtc::ArrayView<const float> x, + const std::array<float, kBlockSize>& s_main, + bool echo_leakage_detected); + + private: + class EchoAudibility { + public: + void Update(rtc::ArrayView<const float> x, + const std::array<float, kBlockSize>& s, + bool converged_filter); + void UpdateWithOutput(rtc::ArrayView<const float> e); + bool InaudibleEcho() const { return inaudible_echo_; } + + private: + float max_nearend_ = 0.f; + size_t max_nearend_counter_ = 0; + size_t low_farend_counter_ = 0; + bool inaudible_echo_ = false; + }; + + void UpdateReverb(const std::array<float, kAdaptiveFilterTimeDomainLength>& + impulse_response); + + static int instance_count_; + std::unique_ptr<ApmDataDumper> data_dumper_; + ErlEstimator erl_estimator_; + ErleEstimator erle_estimator_; + size_t capture_block_counter_ = 0; + size_t blocks_with_filter_adaptation_ = 0; + size_t blocks_with_strong_render_ = 0; + bool usable_linear_estimate_ = false; + bool echo_leakage_detected_ = false; + bool capture_signal_saturation_ = false; + bool echo_saturation_ = false; + bool transparent_mode_ = false; + float previous_max_sample_ = 0.f; + std::array<float, kAdaptiveFilterLength> max_render_; + bool force_zero_gain_ = false; + bool render_received_ = false; + size_t force_zero_gain_counter_ = 0; + rtc::Optional<size_t> filter_delay_; + rtc::Optional<size_t> external_delay_; + size_t blocks_since_last_saturation_ = 1000; + float reverb_decay_to_test_ = 0.9f; + float reverb_decay_candidate_ = 0.f; + float reverb_decay_candidate_residual_ = -1.f; + EchoAudibility echo_audibility_; + const EchoCanceller3Config config_; + float reverb_decay_; + bool saturating_echo_path_ = false; + bool initial_state_ = true; + bool linear_echo_estimate_ = false; + bool sufficient_filter_updates_ = false; + + RTC_DISALLOW_COPY_AND_ASSIGN(AecState); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc new file mode 100644 index 0000000000..34b877b4f9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec_state.h" + +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verify the general functionality of AecState +TEST(AecState, NormalUsage) { + ApmDataDumper data_dumper(42); + AecState state(EchoCanceller3Config{}); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, + std::vector<size_t>(1, 30)); + std::array<float, kFftLengthBy2Plus1> E2_main = {}; + std::array<float, kFftLengthBy2Plus1> Y2 = {}; + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + EchoPathVariability echo_path_variability(false, false); + std::array<float, kBlockSize> s; + s.fill(100.f); + + std::vector<std::array<float, kFftLengthBy2Plus1>> + converged_filter_frequency_response(10); + for (auto& v : converged_filter_frequency_response) { + v.fill(0.01f); + } + std::vector<std::array<float, kFftLengthBy2Plus1>> + diverged_filter_frequency_response = converged_filter_frequency_response; + converged_filter_frequency_response[2].fill(100.f); + converged_filter_frequency_response[2][0] = 1.f; + + std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response; + impulse_response.fill(0.f); + + // Verify that linear AEC usability is false when the filter is diverged and + // there is no external delay reported. + state.Update(diverged_filter_frequency_response, impulse_response, true, + rtc::nullopt, render_buffer, E2_main, Y2, x[0], s, false); + EXPECT_FALSE(state.UsableLinearEstimate()); + + // Verify that linear AEC usability is true when the filter is converged + std::fill(x[0].begin(), x[0].end(), 101.f); + for (int k = 0; k < 3000; ++k) { + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + } + EXPECT_TRUE(state.UsableLinearEstimate()); + + // Verify that linear AEC usability becomes false after an echo path change is + // reported + state.HandleEchoPathChange(EchoPathVariability(true, false)); + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + EXPECT_FALSE(state.UsableLinearEstimate()); + + // Verify that the active render detection works as intended. + std::fill(x[0].begin(), x[0].end(), 101.f); + state.HandleEchoPathChange(EchoPathVariability(true, true)); + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + EXPECT_FALSE(state.ActiveRender()); + + for (int k = 0; k < 1000; ++k) { + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + } + EXPECT_TRUE(state.ActiveRender()); + + // Verify that echo leakage is properly reported. + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + EXPECT_FALSE(state.EchoLeakageDetected()); + + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, true); + EXPECT_TRUE(state.EchoLeakageDetected()); + + // Verify that the ERL is properly estimated + for (auto& x_k : x) { + x_k = std::vector<float>(kBlockSize, 0.f); + } + + x[0][0] = 5000.f; + for (size_t k = 0; k < render_buffer.Buffer().size(); ++k) { + render_buffer.Insert(x); + } + + Y2.fill(10.f * 10000.f * 10000.f); + for (size_t k = 0; k < 1000; ++k) { + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + } + + ASSERT_TRUE(state.UsableLinearEstimate()); + const std::array<float, kFftLengthBy2Plus1>& erl = state.Erl(); + EXPECT_EQ(erl[0], erl[1]); + for (size_t k = 1; k < erl.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 10.f : 1000.f, erl[k], 0.1); + } + EXPECT_EQ(erl[erl.size() - 2], erl[erl.size() - 1]); + + // Verify that the ERLE is properly estimated + E2_main.fill(1.f * 10000.f * 10000.f); + Y2.fill(10.f * E2_main[0]); + for (size_t k = 0; k < 1000; ++k) { + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + } + ASSERT_TRUE(state.UsableLinearEstimate()); + { + const auto& erle = state.Erle(); + EXPECT_EQ(erle[0], erle[1]); + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 1; k < kLowFrequencyLimit; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 8.f : 1.f, erle[k], 0.1); + } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } + + E2_main.fill(1.f * 10000.f * 10000.f); + Y2.fill(5.f * E2_main[0]); + for (size_t k = 0; k < 1000; ++k) { + state.Update(converged_filter_frequency_response, impulse_response, true, 2, + render_buffer, E2_main, Y2, x[0], s, false); + } + + ASSERT_TRUE(state.UsableLinearEstimate()); + { + const auto& erle = state.Erle(); + EXPECT_EQ(erle[0], erle[1]); + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 1; k < kLowFrequencyLimit; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 5.f : 1.f, erle[k], 0.1); + } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } +} + +// Verifies the delay for a converged filter is correctly identified. +TEST(AecState, ConvergedFilterDelay) { + constexpr int kFilterLength = 10; + AecState state(EchoCanceller3Config{}); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, + std::vector<size_t>(1, 30)); + std::array<float, kFftLengthBy2Plus1> E2_main; + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kBlockSize> x; + EchoPathVariability echo_path_variability(false, false); + std::array<float, kBlockSize> s; + s.fill(100.f); + x.fill(0.f); + + std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response( + kFilterLength); + + std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response; + impulse_response.fill(0.f); + + // Verify that the filter delay for a converged filter is properly identified. + for (int k = 0; k < kFilterLength; ++k) { + for (auto& v : frequency_response) { + v.fill(0.01f); + } + frequency_response[k].fill(100.f); + frequency_response[k][0] = 0.f; + state.HandleEchoPathChange(echo_path_variability); + state.Update(frequency_response, impulse_response, true, rtc::nullopt, + render_buffer, E2_main, Y2, x, s, false); + EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay()); + if (k != (kFilterLength - 1)) { + EXPECT_EQ(k, state.FilterDelay()); + } + } +} + +// Verify that the externally reported delay is properly reported and converted. +TEST(AecState, ExternalDelay) { + AecState state(EchoCanceller3Config{}); + std::array<float, kFftLengthBy2Plus1> E2_main; + std::array<float, kFftLengthBy2Plus1> E2_shadow; + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kBlockSize> x; + std::array<float, kBlockSize> s; + s.fill(100.f); + E2_main.fill(0.f); + E2_shadow.fill(0.f); + Y2.fill(0.f); + x.fill(0.f); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, + std::vector<size_t>(1, 30)); + std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response( + kAdaptiveFilterLength); + for (auto& v : frequency_response) { + v.fill(0.01f); + } + + std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response; + impulse_response.fill(0.f); + + for (size_t k = 0; k < frequency_response.size() - 1; ++k) { + state.HandleEchoPathChange(EchoPathVariability(false, false)); + state.Update(frequency_response, impulse_response, true, k * kBlockSize + 5, + render_buffer, E2_main, Y2, x, s, false); + EXPECT_TRUE(state.ExternalDelay()); + EXPECT_EQ(k, state.ExternalDelay()); + } + + // Verify that the externally reported delay is properly unset when it is no + // longer present. + state.HandleEchoPathChange(EchoPathVariability(false, false)); + state.Update(frequency_response, impulse_response, true, rtc::nullopt, + render_buffer, E2_main, Y2, x, s, false); + EXPECT_FALSE(state.ExternalDelay()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer.cc new file mode 100644 index 0000000000..3160624515 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_framer.h" + +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { + +BlockFramer::BlockFramer(size_t num_bands) + : num_bands_(num_bands), + buffer_(num_bands_, std::vector<float>(kBlockSize, 0.f)) {} + +BlockFramer::~BlockFramer() = default; + +// All the constants are chosen so that the buffer is either empty or has enough +// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to +// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need +// to be called in the correct order. +void BlockFramer::InsertBlock(const std::vector<std::vector<float>>& block) { + RTC_DCHECK_EQ(num_bands_, block.size()); + for (size_t i = 0; i < num_bands_; ++i) { + RTC_DCHECK_EQ(kBlockSize, block[i].size()); + RTC_DCHECK_EQ(0, buffer_[i].size()); + buffer_[i].insert(buffer_[i].begin(), block[i].begin(), block[i].end()); + } +} + +void BlockFramer::InsertBlockAndExtractSubFrame( + const std::vector<std::vector<float>>& block, + std::vector<rtc::ArrayView<float>>* sub_frame) { + RTC_DCHECK(sub_frame); + RTC_DCHECK_EQ(num_bands_, block.size()); + RTC_DCHECK_EQ(num_bands_, sub_frame->size()); + for (size_t i = 0; i < num_bands_; ++i) { + RTC_DCHECK_LE(kSubFrameLength, buffer_[i].size() + kBlockSize); + RTC_DCHECK_EQ(kBlockSize, block[i].size()); + RTC_DCHECK_GE(kBlockSize, buffer_[i].size()); + RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[i].size()); + const int samples_to_frame = kSubFrameLength - buffer_[i].size(); + std::copy(buffer_[i].begin(), buffer_[i].end(), (*sub_frame)[i].begin()); + std::copy(block[i].begin(), block[i].begin() + samples_to_frame, + (*sub_frame)[i].begin() + buffer_[i].size()); + buffer_[i].clear(); + buffer_[i].insert(buffer_[i].begin(), block[i].begin() + samples_to_frame, + block[i].end()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer.h new file mode 100644 index 0000000000..923e4cfb53 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ + +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Class for producing frames consisting of 1 or 2 subframes of 80 samples each +// from 64 sample blocks. The class is designed to work together with the +// FrameBlocker class which performs the reverse conversion. Used together with +// that, this class produces output frames are the same rate as frames are +// received by the FrameBlocker class. Note that the internal buffers will +// overrun if any other rate of packets insertion is used. +class BlockFramer { + public: + explicit BlockFramer(size_t num_bands); + ~BlockFramer(); + // Adds a 64 sample block into the data that will form the next output frame. + void InsertBlock(const std::vector<std::vector<float>>& block); + // Adds a 64 sample block and extracts an 80 sample subframe. + void InsertBlockAndExtractSubFrame( + const std::vector<std::vector<float>>& block, + std::vector<rtc::ArrayView<float>>* sub_frame); + + private: + const size_t num_bands_; + std::vector<std::vector<float>> buffer_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(BlockFramer); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer_unittest.cc new file mode 100644 index 0000000000..16d3944e13 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer_unittest.cc @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_framer.h" + +#include <sstream> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +void SetupSubFrameView(std::vector<std::vector<float>>* sub_frame, + std::vector<rtc::ArrayView<float>>* sub_frame_view) { + for (size_t k = 0; k < sub_frame_view->size(); ++k) { + (*sub_frame_view)[k] = + rtc::ArrayView<float>((*sub_frame)[k].data(), (*sub_frame)[k].size()); + } +} + +float ComputeSampleValue(size_t chunk_counter, + size_t chunk_size, + size_t band, + size_t sample_index, + int offset) { + float value = + static_cast<int>(chunk_counter * chunk_size + sample_index) + offset; + return value > 0 ? 5000 * band + value : 0; +} + +bool VerifySubFrame(size_t sub_frame_counter, + int offset, + const std::vector<rtc::ArrayView<float>>& sub_frame_view) { + for (size_t k = 0; k < sub_frame_view.size(); ++k) { + for (size_t i = 0; i < sub_frame_view[k].size(); ++i) { + const float reference_value = + ComputeSampleValue(sub_frame_counter, kSubFrameLength, k, i, offset); + if (reference_value != sub_frame_view[k][i]) { + return false; + } + } + } + return true; +} + +void FillBlock(size_t block_counter, std::vector<std::vector<float>>* block) { + for (size_t k = 0; k < block->size(); ++k) { + for (size_t i = 0; i < (*block)[0].size(); ++i) { + (*block)[k][i] = ComputeSampleValue(block_counter, kBlockSize, k, i, 0); + } + } +} + +// Verifies that the BlockFramer is able to produce the expected frame content. +void RunFramerTest(int sample_rate_hz) { + constexpr size_t kNumSubFramesToProcess = 2; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> block(num_bands, + std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> output_sub_frame( + num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> output_sub_frame_view(num_bands); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(num_bands); + + size_t block_index = 0; + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillBlock(block_index++, &block); + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + EXPECT_TRUE(VerifySubFrame(sub_frame_index, -64, output_sub_frame_view)); + + if ((sub_frame_index + 1) % 4 == 0) { + FillBlock(block_index++, &block); + framer.InsertBlock(block); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the BlockFramer crashes if the InsertBlockAndExtractSubFrame +// method is called for inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertAndExtractParametersTest(int sample_rate_hz, + size_t num_block_bands, + size_t block_length, + size_t num_sub_frame_bands, + size_t sub_frame_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> block(num_block_bands, + std::vector<float>(block_length, 0.f)); + std::vector<std::vector<float>> output_sub_frame( + num_sub_frame_bands, std::vector<float>(sub_frame_length, 0.f)); + std::vector<rtc::ArrayView<float>> output_sub_frame_view( + output_sub_frame.size()); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands); + EXPECT_DEATH( + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view), ""); +} + +// Verifies that the BlockFramer crashes if the InsertBlock method is called for +// inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertParameterTest(int sample_rate_hz, + size_t num_block_bands, + size_t block_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> correct_block( + correct_num_bands, std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> wrong_block( + num_block_bands, std::vector<float>(block_length, 0.f)); + std::vector<std::vector<float>> output_sub_frame( + correct_num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> output_sub_frame_view( + output_sub_frame.size()); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + framer.InsertBlockAndExtractSubFrame(correct_block, &output_sub_frame_view); + + EXPECT_DEATH(framer.InsertBlock(wrong_block), ""); +} + +// Verifies that the BlockFramer crashes if the InsertBlock method is called +// after a wrong number of previous InsertBlockAndExtractSubFrame method calls +// have been made. +void RunWronglyInsertOrderTest(int sample_rate_hz, + size_t num_preceeding_api_calls) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> block(correct_num_bands, + std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> output_sub_frame( + correct_num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> output_sub_frame_view( + output_sub_frame.size()); + SetupSubFrameView(&output_sub_frame, &output_sub_frame_view); + BlockFramer framer(correct_num_bands); + for (size_t k = 0; k < num_preceeding_api_calls; ++k) { + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + } + + EXPECT_DEATH(framer.InsertBlock(block), ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(BlockFramer, WrongNumberOfBandsInBlockForInsertBlockAndExtractSubFrame) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, wrong_num_bands, kBlockSize, correct_num_bands, kSubFrameLength); + } +} + +TEST(BlockFramer, + WrongNumberOfBandsInSubFrameForInsertBlockAndExtractSubFrame) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_bands, kBlockSize, wrong_num_bands, kSubFrameLength); + } +} + +TEST(BlockFramer, WrongNumberOfSamplesInBlockForInsertBlockAndExtractSubFrame) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_bands, kBlockSize - 1, correct_num_bands, + kSubFrameLength); + } +} + +TEST(BlockFramer, + WrongNumberOfSamplesInSubFrameForInsertBlockAndExtractSubFrame) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest(rate, correct_num_bands, + kBlockSize, correct_num_bands, + kSubFrameLength - 1); + } +} + +TEST(BlockFramer, WrongNumberOfBandsInBlockForInsertBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertParameterTest(rate, wrong_num_bands, kBlockSize); + } +} + +TEST(BlockFramer, WrongNumberOfSamplesInBlockForInsertBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertParameterTest(rate, correct_num_bands, kBlockSize - 1); + } +} + +TEST(BlockFramer, WrongNumberOfPreceedingApiCallsForInsertBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + for (size_t num_calls = 0; num_calls < 4; ++num_calls) { + std::ostringstream ss; + ss << "Sample rate: " << rate; + ss << ", Num preceeding InsertBlockAndExtractSubFrame calls: " + << num_calls; + + SCOPED_TRACE(ss.str()); + RunWronglyInsertOrderTest(rate, num_calls); + } + } +} + +// Verifiers that the verification for null sub_frame pointer works. +TEST(BlockFramer, NullSubFrameParameter) { + EXPECT_DEATH(BlockFramer(1).InsertBlockAndExtractSubFrame( + std::vector<std::vector<float>>( + 1, std::vector<float>(kBlockSize, 0.f)), + nullptr), + ""); +} + +#endif + +TEST(BlockFramer, FrameBitexactness) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunFramerTest(rate); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor.cc new file mode 100644 index 0000000000..f0b963087c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/block_processor.h" + +#include "api/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor_metrics.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +enum class BlockProcessorApiCall { kCapture, kRender }; + +class BlockProcessorImpl final : public BlockProcessor { + public: + BlockProcessorImpl(int sample_rate_hz, + std::unique_ptr<RenderDelayBuffer> render_buffer, + std::unique_ptr<RenderDelayController> delay_controller, + std::unique_ptr<EchoRemover> echo_remover); + + ~BlockProcessorImpl() override; + + void ProcessCapture(bool echo_path_gain_change, + bool capture_signal_saturation, + std::vector<std::vector<float>>* capture_block) override; + + void BufferRender(const std::vector<std::vector<float>>& block) override; + + void UpdateEchoLeakageStatus(bool leakage_detected) override; + + void GetMetrics(EchoControl::Metrics* metrics) const override; + + private: + static int instance_count_; + bool no_capture_data_received_ = true; + bool no_render_data_received_ = true; + std::unique_ptr<ApmDataDumper> data_dumper_; + const size_t sample_rate_hz_; + std::unique_ptr<RenderDelayBuffer> render_buffer_; + std::unique_ptr<RenderDelayController> delay_controller_; + std::unique_ptr<EchoRemover> echo_remover_; + BlockProcessorMetrics metrics_; + bool render_buffer_overrun_occurred_ = false; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(BlockProcessorImpl); +}; + +int BlockProcessorImpl::instance_count_ = 0; + +BlockProcessorImpl::BlockProcessorImpl( + int sample_rate_hz, + std::unique_ptr<RenderDelayBuffer> render_buffer, + std::unique_ptr<RenderDelayController> delay_controller, + std::unique_ptr<EchoRemover> echo_remover) + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + sample_rate_hz_(sample_rate_hz), + render_buffer_(std::move(render_buffer)), + delay_controller_(std::move(delay_controller)), + echo_remover_(std::move(echo_remover)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); +} + +BlockProcessorImpl::~BlockProcessorImpl() = default; + +void BlockProcessorImpl::ProcessCapture( + bool echo_path_gain_change, + bool capture_signal_saturation, + std::vector<std::vector<float>>* capture_block) { + RTC_DCHECK(capture_block); + RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->size()); + RTC_DCHECK_EQ(kBlockSize, (*capture_block)[0].size()); + data_dumper_->DumpRaw("aec3_processblock_call_order", + static_cast<int>(BlockProcessorApiCall::kCapture)); + data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize, + &(*capture_block)[0][0], + LowestBandRate(sample_rate_hz_), 1); + + // Do not start processing until render data has been buffered as that will + // cause the buffers to be wrongly aligned. + no_capture_data_received_ = false; + if (no_render_data_received_) { + return; + } + + data_dumper_->DumpWav("aec3_processblock_capture_input2", kBlockSize, + &(*capture_block)[0][0], + LowestBandRate(sample_rate_hz_), 1); + + bool render_buffer_underrun = false; + if (render_buffer_overrun_occurred_) { + // Reset the render buffers and the alignment functionality when there has + // been a render buffer overrun as the buffer alignment may be noncausal. + delay_controller_->Reset(); + render_buffer_->Reset(); + RTC_LOG(LS_WARNING) << "Reset due to detected render buffer overrun."; + } + + // Update the render buffers with new render data, filling the buffers with + // empty blocks when there is no render data available. + render_buffer_underrun = !render_buffer_->UpdateBuffers(); + if (render_buffer_underrun) { + RTC_LOG(LS_WARNING) << "Render API jitter buffer underrun."; + } + + // Compute and and apply the render delay required to achieve proper signal + // alignment. + const size_t old_delay = render_buffer_->Delay(); + const size_t new_delay = delay_controller_->GetDelay( + render_buffer_->GetDownsampledRenderBuffer(), (*capture_block)[0]); + + bool delay_change; + if (new_delay >= kMinEchoPathDelayBlocks) { + render_buffer_->SetDelay(new_delay); + const size_t achieved_delay = render_buffer_->Delay(); + delay_change = old_delay != achieved_delay || old_delay != new_delay || + render_buffer_overrun_occurred_; + + // Inform the delay controller of the actually set delay to allow it to + // properly react to a non-feasible delay. + delay_controller_->SetDelay(achieved_delay); + } else { + delay_controller_->Reset(); + render_buffer_->Reset(); + delay_change = true; + RTC_LOG(LS_WARNING) << "Reset due to noncausal delay."; + } + + // Remove the echo from the capture signal. + echo_remover_->ProcessCapture( + delay_controller_->AlignmentHeadroomSamples(), + EchoPathVariability(echo_path_gain_change, delay_change), + capture_signal_saturation, render_buffer_->GetRenderBuffer(), + capture_block); + + // Update the metrics. + metrics_.UpdateCapture(render_buffer_underrun); + + render_buffer_overrun_occurred_ = false; +} + +void BlockProcessorImpl::BufferRender( + const std::vector<std::vector<float>>& block) { + RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.size()); + RTC_DCHECK_EQ(kBlockSize, block[0].size()); + data_dumper_->DumpRaw("aec3_processblock_call_order", + static_cast<int>(BlockProcessorApiCall::kRender)); + data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize, + &block[0][0], LowestBandRate(sample_rate_hz_), 1); + + no_render_data_received_ = false; + + // Do not start buffer render data until capture data has been received as + // that data may give a false alignment. + if (no_capture_data_received_) { + return; + } + + data_dumper_->DumpWav("aec3_processblock_render_input2", kBlockSize, + &block[0][0], LowestBandRate(sample_rate_hz_), 1); + + // Buffer the render data. + render_buffer_overrun_occurred_ = !render_buffer_->Insert(block); + + // Update the metrics. + metrics_.UpdateRender(render_buffer_overrun_occurred_); +} + +void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) { + echo_remover_->UpdateEchoLeakageStatus(leakage_detected); +} + +void BlockProcessorImpl::GetMetrics(EchoControl::Metrics* metrics) const { + echo_remover_->GetMetrics(metrics); + const int block_size_ms = sample_rate_hz_ == 8000 ? 8 : 4; + metrics->delay_ms = static_cast<int>(render_buffer_->Delay()) * block_size_ms; +} + +} // namespace + +BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config, + int sample_rate_hz) { + std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create( + NumBandsForRate(sample_rate_hz), config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(config, sample_rate_hz)); + std::unique_ptr<EchoRemover> echo_remover( + EchoRemover::Create(config, sample_rate_hz)); + return Create(config, sample_rate_hz, std::move(render_buffer), + std::move(delay_controller), std::move(echo_remover)); +} + +BlockProcessor* BlockProcessor::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + std::unique_ptr<RenderDelayBuffer> render_buffer) { + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(config, sample_rate_hz)); + std::unique_ptr<EchoRemover> echo_remover( + EchoRemover::Create(config, sample_rate_hz)); + return Create(config, sample_rate_hz, std::move(render_buffer), + std::move(delay_controller), std::move(echo_remover)); +} + +BlockProcessor* BlockProcessor::Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + std::unique_ptr<RenderDelayBuffer> render_buffer, + std::unique_ptr<RenderDelayController> delay_controller, + std::unique_ptr<EchoRemover> echo_remover) { + return new BlockProcessorImpl(sample_rate_hz, std::move(render_buffer), + std::move(delay_controller), + std::move(echo_remover)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor.h new file mode 100644 index 0000000000..8687bc2282 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ + +#include <memory> +#include <vector> + +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" + +namespace webrtc { + +// Class for performing echo cancellation on 64 sample blocks of audio data. +class BlockProcessor { + public: + static BlockProcessor* Create(const EchoCanceller3Config& config, + int sample_rate_hz); + // Only used for testing purposes. + static BlockProcessor* Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + std::unique_ptr<RenderDelayBuffer> render_buffer); + static BlockProcessor* Create( + const EchoCanceller3Config& config, + int sample_rate_hz, + std::unique_ptr<RenderDelayBuffer> render_buffer, + std::unique_ptr<RenderDelayController> delay_controller, + std::unique_ptr<EchoRemover> echo_remover); + + virtual ~BlockProcessor() = default; + + // Get current metrics. + virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0; + + // Processes a block of capture data. + virtual void ProcessCapture( + bool echo_path_gain_change, + bool capture_signal_saturation, + std::vector<std::vector<float>>* capture_block) = 0; + + // Buffers a block of render data supplied by a FrameBlocker object. + virtual void BufferRender( + const std::vector<std::vector<float>>& render_block) = 0; + + // Reports whether echo leakage has been detected in the echo canceller + // output. + virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics.cc new file mode 100644 index 0000000000..c8bdda7389 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor_metrics.h" + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +enum class RenderUnderrunCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +enum class RenderOverrunCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +} // namespace + +void BlockProcessorMetrics::UpdateCapture(bool underrun) { + ++capture_block_counter_; + if (underrun) { + ++render_buffer_underruns_; + } + + if (capture_block_counter_ == kMetricsReportingIntervalBlocks) { + metrics_reported_ = true; + + RenderUnderrunCategory underrun_category; + if (render_buffer_underruns_ == 0) { + underrun_category = RenderUnderrunCategory::kNone; + } else if (render_buffer_underruns_ > (capture_block_counter_ >> 1)) { + underrun_category = RenderUnderrunCategory::kConstant; + } else if (render_buffer_underruns_ > 100) { + underrun_category = RenderUnderrunCategory::kMany; + } else if (render_buffer_underruns_ > 10) { + underrun_category = RenderUnderrunCategory::kSeveral; + } else { + underrun_category = RenderUnderrunCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.RenderUnderruns", + static_cast<int>(underrun_category), + static_cast<int>(RenderUnderrunCategory::kNumCategories)); + + RenderOverrunCategory overrun_category; + if (render_buffer_overruns_ == 0) { + overrun_category = RenderOverrunCategory::kNone; + } else if (render_buffer_overruns_ > (buffer_render_calls_ >> 1)) { + overrun_category = RenderOverrunCategory::kConstant; + } else if (render_buffer_overruns_ > 100) { + overrun_category = RenderOverrunCategory::kMany; + } else if (render_buffer_overruns_ > 10) { + overrun_category = RenderOverrunCategory::kSeveral; + } else { + overrun_category = RenderOverrunCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.RenderOverruns", + static_cast<int>(overrun_category), + static_cast<int>(RenderOverrunCategory::kNumCategories)); + + ResetMetrics(); + capture_block_counter_ = 0; + } else { + metrics_reported_ = false; + } +} + +void BlockProcessorMetrics::UpdateRender(bool overrun) { + ++buffer_render_calls_; + if (overrun) { + ++render_buffer_overruns_; + } +} + +void BlockProcessorMetrics::ResetMetrics() { + render_buffer_underruns_ = 0; + render_buffer_overruns_ = 0; + buffer_render_calls_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics.h new file mode 100644 index 0000000000..9b437c0d13 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ + +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Handles the reporting of metrics for the block_processor. +class BlockProcessorMetrics { + public: + BlockProcessorMetrics() = default; + + // Updates the metric with new capture data. + void UpdateCapture(bool underrun); + + // Updates the metric with new render data. + void UpdateRender(bool overrun); + + // Returns true if the metrics have just been reported, otherwise false. + bool MetricsReported() { return metrics_reported_; } + + private: + // Resets the metrics. + void ResetMetrics(); + + int capture_block_counter_ = 0; + bool metrics_reported_ = false; + int render_buffer_underruns_ = 0; + int render_buffer_overruns_ = 0; + int buffer_render_calls_ = 0; + + RTC_DISALLOW_COPY_AND_ASSIGN(BlockProcessorMetrics); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc new file mode 100644 index 0000000000..7ce8573efa --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics_unittest.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor_metrics.h" + +#include "test/gtest.h" + +namespace webrtc { + +// Verify the general functionality of BlockProcessorMetrics. +TEST(BlockProcessorMetrics, NormalUsage) { + BlockProcessorMetrics metrics; + + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.UpdateRender(false); + metrics.UpdateRender(false); + metrics.UpdateCapture(false); + EXPECT_FALSE(metrics.MetricsReported()); + } + metrics.UpdateCapture(false); + EXPECT_TRUE(metrics.MetricsReported()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_unittest.cc new file mode 100644 index 0000000000..18d1f65ce3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_unittest.cc @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/block_processor.h" + +#include <memory> +#include <sstream> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/mock/mock_echo_remover.h" +#include "modules/audio_processing/aec3/mock/mock_render_delay_buffer.h" +#include "modules/audio_processing/aec3/mock/mock_render_delay_controller.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using testing::AtLeast; +using testing::Return; +using testing::StrictMock; +using testing::_; + +// Verifies that the basic BlockProcessor functionality works and that the API +// methods are callable. +void RunBasicSetupAndApiCallTest(int sample_rate_hz) { + std::unique_ptr<BlockProcessor> block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz)); + std::vector<std::vector<float>> block(NumBandsForRate(sample_rate_hz), + std::vector<float>(kBlockSize, 0.f)); + + block_processor->BufferRender(block); + block_processor->ProcessCapture(false, false, &block); + block_processor->UpdateEchoLeakageStatus(false); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +void RunRenderBlockSizeVerificationTest(int sample_rate_hz) { + std::unique_ptr<BlockProcessor> block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz)); + std::vector<std::vector<float>> block( + NumBandsForRate(sample_rate_hz), std::vector<float>(kBlockSize - 1, 0.f)); + + EXPECT_DEATH(block_processor->BufferRender(block), ""); +} + +void RunCaptureBlockSizeVerificationTest(int sample_rate_hz) { + std::unique_ptr<BlockProcessor> block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz)); + std::vector<std::vector<float>> block( + NumBandsForRate(sample_rate_hz), std::vector<float>(kBlockSize - 1, 0.f)); + + EXPECT_DEATH(block_processor->ProcessCapture(false, false, &block), ""); +} + +void RunRenderNumBandsVerificationTest(int sample_rate_hz) { + const size_t wrong_num_bands = NumBandsForRate(sample_rate_hz) < 3 + ? NumBandsForRate(sample_rate_hz) + 1 + : 1; + std::unique_ptr<BlockProcessor> block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz)); + std::vector<std::vector<float>> block(wrong_num_bands, + std::vector<float>(kBlockSize, 0.f)); + + EXPECT_DEATH(block_processor->BufferRender(block), ""); +} + +void RunCaptureNumBandsVerificationTest(int sample_rate_hz) { + const size_t wrong_num_bands = NumBandsForRate(sample_rate_hz) < 3 + ? NumBandsForRate(sample_rate_hz) + 1 + : 1; + std::unique_ptr<BlockProcessor> block_processor( + BlockProcessor::Create(EchoCanceller3Config(), sample_rate_hz)); + std::vector<std::vector<float>> block(wrong_num_bands, + std::vector<float>(kBlockSize, 0.f)); + + EXPECT_DEATH(block_processor->ProcessCapture(false, false, &block), ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +} // namespace + +// Verifies that the delay controller functionality is properly integrated with +// the render delay buffer inside block processor. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST(BlockProcessor, DISABLED_DelayControllerIntegration) { + constexpr size_t kNumBlocks = 310; + constexpr size_t kDelayInSamples = 640; + constexpr size_t kDelayHeadroom = 1; + constexpr size_t kDelayInBlocks = + kDelayInSamples / kBlockSize - kDelayHeadroom; + Random random_generator(42U); + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<testing::StrictMock<webrtc::test::MockRenderDelayBuffer>> + render_delay_buffer_mock( + new StrictMock<webrtc::test::MockRenderDelayBuffer>(rate)); + EXPECT_CALL(*render_delay_buffer_mock, Insert(_)) + .Times(kNumBlocks) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*render_delay_buffer_mock, IsBlockAvailable()) + .Times(kNumBlocks) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*render_delay_buffer_mock, SetDelay(kDelayInBlocks)) + .Times(AtLeast(1)); + EXPECT_CALL(*render_delay_buffer_mock, MaxDelay()).WillOnce(Return(30)); + EXPECT_CALL(*render_delay_buffer_mock, Delay()) + .Times(kNumBlocks + 1) + .WillRepeatedly(Return(0)); + std::unique_ptr<BlockProcessor> block_processor(BlockProcessor::Create( + EchoCanceller3Config(), rate, std::move(render_delay_buffer_mock))); + + std::vector<std::vector<float>> render_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> capture_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + DelayBuffer<float> signal_delay_buffer(kDelayInSamples); + for (size_t k = 0; k < kNumBlocks; ++k) { + RandomizeSampleVector(&random_generator, render_block[0]); + signal_delay_buffer.Delay(render_block[0], capture_block[0]); + block_processor->BufferRender(render_block); + block_processor->ProcessCapture(false, false, &capture_block); + } + } +} + +// Verifies that BlockProcessor submodules are called in a proper manner. +TEST(BlockProcessor, DISABLED_SubmoduleIntegration) { + constexpr size_t kNumBlocks = 310; + Random random_generator(42U); + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<testing::StrictMock<webrtc::test::MockRenderDelayBuffer>> + render_delay_buffer_mock( + new StrictMock<webrtc::test::MockRenderDelayBuffer>(rate)); + std::unique_ptr< + testing::StrictMock<webrtc::test::MockRenderDelayController>> + render_delay_controller_mock( + new StrictMock<webrtc::test::MockRenderDelayController>()); + std::unique_ptr<testing::StrictMock<webrtc::test::MockEchoRemover>> + echo_remover_mock(new StrictMock<webrtc::test::MockEchoRemover>()); + + EXPECT_CALL(*render_delay_buffer_mock, Insert(_)) + .Times(kNumBlocks - 1) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*render_delay_buffer_mock, IsBlockAvailable()) + .Times(kNumBlocks) + .WillRepeatedly(Return(true)); + EXPECT_CALL(*render_delay_buffer_mock, UpdateBuffers()).Times(kNumBlocks); + EXPECT_CALL(*render_delay_buffer_mock, SetDelay(9)).Times(AtLeast(1)); + EXPECT_CALL(*render_delay_buffer_mock, Delay()) + .Times(kNumBlocks) + .WillRepeatedly(Return(0)); + EXPECT_CALL(*render_delay_controller_mock, GetDelay(_, _)) + .Times(kNumBlocks) + .WillRepeatedly(Return(9)); + EXPECT_CALL(*render_delay_controller_mock, AlignmentHeadroomSamples()) + .Times(kNumBlocks); + EXPECT_CALL(*echo_remover_mock, ProcessCapture(_, _, _, _, _)) + .Times(kNumBlocks); + EXPECT_CALL(*echo_remover_mock, UpdateEchoLeakageStatus(_)) + .Times(kNumBlocks); + + std::unique_ptr<BlockProcessor> block_processor(BlockProcessor::Create( + EchoCanceller3Config(), rate, std::move(render_delay_buffer_mock), + std::move(render_delay_controller_mock), std::move(echo_remover_mock))); + + std::vector<std::vector<float>> render_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> capture_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + DelayBuffer<float> signal_delay_buffer(640); + for (size_t k = 0; k < kNumBlocks; ++k) { + RandomizeSampleVector(&random_generator, render_block[0]); + signal_delay_buffer.Delay(render_block[0], capture_block[0]); + block_processor->BufferRender(render_block); + block_processor->ProcessCapture(false, false, &capture_block); + block_processor->UpdateEchoLeakageStatus(false); + } + } +} + +TEST(BlockProcessor, BasicSetupAndApiCalls) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunBasicSetupAndApiCallTest(rate); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// TODO(gustaf): Re-enable the test once the issue with memory leaks during +// DEATH tests on test bots has been fixed. +TEST(BlockProcessor, DISABLED_VerifyRenderBlockSizeCheck) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunRenderBlockSizeVerificationTest(rate); + } +} + +TEST(BlockProcessor, VerifyCaptureBlockSizeCheck) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunCaptureBlockSizeVerificationTest(rate); + } +} + +TEST(BlockProcessor, VerifyRenderNumBandsCheck) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunRenderNumBandsVerificationTest(rate); + } +} + +// TODO(peah): Verify the check for correct number of bands in the capture +// signal. +TEST(BlockProcessor, VerifyCaptureNumBandsCheck) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunCaptureNumBandsVerificationTest(rate); + } +} + +// Verifiers that the verification for null ProcessCapture input works. +TEST(BlockProcessor, NullProcessCaptureParameter) { + EXPECT_DEATH(std::unique_ptr<BlockProcessor>( + BlockProcessor::Create(EchoCanceller3Config(), 8000)) + ->ProcessCapture(false, false, nullptr), + ""); +} + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(BlockProcessor, DISABLED_WrongSampleRate) { + EXPECT_DEATH(std::unique_ptr<BlockProcessor>( + BlockProcessor::Create(EchoCanceller3Config(), 8001)), + ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.cc new file mode 100644 index 0000000000..9a472f516f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/cascaded_biquad_filter.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +CascadedBiQuadFilter::CascadedBiQuadFilter( + const CascadedBiQuadFilter::BiQuadCoefficients& coefficients, + size_t num_biquads) + : biquad_states_(num_biquads), coefficients_(coefficients) {} + +CascadedBiQuadFilter::~CascadedBiQuadFilter() = default; + +void CascadedBiQuadFilter::Process(rtc::ArrayView<const float> x, + rtc::ArrayView<float> y) { + ApplyBiQuad(x, y, &biquad_states_[0]); + for (size_t k = 1; k < biquad_states_.size(); ++k) { + ApplyBiQuad(y, y, &biquad_states_[k]); + } +} + +void CascadedBiQuadFilter::Process(rtc::ArrayView<float> y) { + for (auto& biquad : biquad_states_) { + ApplyBiQuad(y, y, &biquad); + } +} + +void CascadedBiQuadFilter::ApplyBiQuad( + rtc::ArrayView<const float> x, + rtc::ArrayView<float> y, + CascadedBiQuadFilter::BiQuadState* biquad_state) { + RTC_DCHECK_EQ(x.size(), y.size()); + RTC_DCHECK(biquad_state); + const auto* c_b = coefficients_.b; + const auto* c_a = coefficients_.a; + auto* m_x = biquad_state->x; + auto* m_y = biquad_state->y; + for (size_t k = 0; k < x.size(); ++k) { + const float tmp = x[k]; + y[k] = c_b[0] * tmp + c_b[1] * m_x[0] + c_b[2] * m_x[1] - c_a[0] * m_y[0] - + c_a[1] * m_y[1]; + m_x[1] = m_x[0]; + m_x[0] = tmp; + m_y[1] = m_y[0]; + m_y[0] = y[k]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.h new file mode 100644 index 0000000000..aea889ab53 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_CASCADED_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_CASCADED_BIQUAD_FILTER_H_ + +#include <vector> + +#include "api/array_view.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Applies a number of identical biquads in a cascaded manner. The filter +// implementation is direct form 1. +class CascadedBiQuadFilter { + public: + struct BiQuadState { + BiQuadState() : x(), y() {} + float x[2]; + float y[2]; + }; + + struct BiQuadCoefficients { + float b[3]; + float a[2]; + }; + + CascadedBiQuadFilter( + const CascadedBiQuadFilter::BiQuadCoefficients& coefficients, + size_t num_biquads); + ~CascadedBiQuadFilter(); + // Applies the biquads on the values in x in order to form the output in y. + void Process(rtc::ArrayView<const float> x, rtc::ArrayView<float> y); + // Applies the biquads on the values in y in an in-place manner. + void Process(rtc::ArrayView<float> y); + + private: + void ApplyBiQuad(rtc::ArrayView<const float> x, + rtc::ArrayView<float> y, + CascadedBiQuadFilter::BiQuadState* biquad_state); + + std::vector<BiQuadState> biquad_states_; + const BiQuadCoefficients coefficients_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(CascadedBiQuadFilter); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_CASCADED_BIQUAD_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter_unittest.cc new file mode 100644 index 0000000000..fcb77e1f6e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter_unittest.cc @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/cascaded_biquad_filter.h" + +#include <vector> + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +// Coefficients for a second order Butterworth high-pass filter with cutoff +// frequency 100 Hz. +const CascadedBiQuadFilter::BiQuadCoefficients kHighPassFilterCoefficients = { + {0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; + +const CascadedBiQuadFilter::BiQuadCoefficients kTransparentCoefficients = { + {1.f, 0.f, 0.f}, + {0.f, 0.f}}; + +const CascadedBiQuadFilter::BiQuadCoefficients kBlockingCoefficients = { + {0.f, 0.f, 0.f}, + {0.f, 0.f}}; + +std::vector<float> CreateInputWithIncreasingValues(size_t vector_length) { + std::vector<float> v(vector_length); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + return v; +} + +} // namespace + +// Verifies that the filter applies an effect which removes the input signal. +// The test also verifies that the in-place Process API call works as intended. +TEST(CascadedBiquadFilter, BlockingConfiguration) { + std::vector<float> values = CreateInputWithIncreasingValues(1000); + + CascadedBiQuadFilter filter(kBlockingCoefficients, 1); + filter.Process(values); + + EXPECT_EQ(std::vector<float>(1000, 0.f), values); +} + +// Verifies that the filter is able to form a zero-mean output from a +// non-zeromean input signal when coefficients for a high-pass filter are +// applied. The test also verifies that the filter works with multiple biquads. +TEST(CascadedBiquadFilter, HighPassConfiguration) { + std::vector<float> values(1000); + for (size_t k = 0; k < values.size(); ++k) { + values[k] = 1.f; + } + + CascadedBiQuadFilter filter(kHighPassFilterCoefficients, 2); + filter.Process(values); + + for (size_t k = values.size() / 2; k < values.size(); ++k) { + EXPECT_NEAR(0.f, values[k], 1e-4); + } +} + +// Verifies that the filter is able to produce a transparent effect with no +// impact on the data when the proper coefficients are applied. The test also +// verifies that the non-in-place Process API call works as intended. +TEST(CascadedBiquadFilter, TransparentConfiguration) { + const std::vector<float> input = CreateInputWithIncreasingValues(1000); + std::vector<float> output(input.size()); + + CascadedBiQuadFilter filter(kTransparentCoefficients, 1); + filter.Process(input, output); + + EXPECT_EQ(input, output); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check of the lengths for the input and output works for the +// non-in-place call. +TEST(CascadedBiquadFilter, InputSizeCheckVerification) { + const std::vector<float> input = CreateInputWithIncreasingValues(10); + std::vector<float> output(input.size() - 1); + + CascadedBiQuadFilter filter(kTransparentCoefficients, 1); + EXPECT_DEATH(filter.Process(input, output), ""); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc new file mode 100644 index 0000000000..dab40a9d7e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/comfort_noise_generator.h" + +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include <math.h> +#include <algorithm> +#include <array> +#include <functional> +#include <numeric> + +#include "common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +namespace { + +// Creates an array of uniformly distributed variables. +void TableRandomValue(int16_t* vector, int16_t vector_length, uint32_t* seed) { + for (int i = 0; i < vector_length; i++) { + seed[0] = (seed[0] * ((int32_t)69069) + 1) & (0x80000000 - 1); + vector[i] = (int16_t)(seed[0] >> 16); + } +} + +} // namespace + +namespace aec3 { + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +void EstimateComfortNoise_SSE2(const std::array<float, kFftLengthBy2Plus1>& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise) { + FftData* N_low = lower_band_noise; + FftData* N_high = upper_band_noise; + + // Compute square root spectrum. + std::array<float, kFftLengthBy2Plus1> N; + for (size_t k = 0; k < kFftLengthBy2; k += 4) { + __m128 v = _mm_loadu_ps(&N2[k]); + v = _mm_sqrt_ps(v); + _mm_storeu_ps(&N[k], v); + } + + N[kFftLengthBy2] = sqrtf(N2[kFftLengthBy2]); + + // Compute the noise level for the upper bands. + constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1); + constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2; + const float high_band_noise_level = + std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) * + kOneByNumBands; + + // Generate complex noise. + std::array<int16_t, kFftLengthBy2 - 1> random_values_int; + TableRandomValue(random_values_int.data(), random_values_int.size(), seed); + + std::array<float, kFftLengthBy2 - 1> sin; + std::array<float, kFftLengthBy2 - 1> cos; + constexpr float kScale = 6.28318530717959f / 32768.0f; + std::transform(random_values_int.begin(), random_values_int.end(), + sin.begin(), [&](int16_t a) { return -sinf(kScale * a); }); + std::transform(random_values_int.begin(), random_values_int.end(), + cos.begin(), [&](int16_t a) { return cosf(kScale * a); }); + + // Form low-frequency noise via spectral shaping. + N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] = + N_high->re[kFftLengthBy2] = 0.f; + std::transform(cos.begin(), cos.end(), N.begin() + 1, N_low->re.begin() + 1, + std::multiplies<float>()); + std::transform(sin.begin(), sin.end(), N.begin() + 1, N_low->im.begin() + 1, + std::multiplies<float>()); + + // Form the high-frequency noise via simple levelling. + std::transform(cos.begin(), cos.end(), N_high->re.begin() + 1, + [&](float a) { return high_band_noise_level * a; }); + std::transform(sin.begin(), sin.end(), N_high->im.begin() + 1, + [&](float a) { return high_band_noise_level * a; }); +} + +#endif + +void EstimateComfortNoise(const std::array<float, kFftLengthBy2Plus1>& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise) { + FftData* N_low = lower_band_noise; + FftData* N_high = upper_band_noise; + + // Compute square root spectrum. + std::array<float, kFftLengthBy2Plus1> N; + std::transform(N2.begin(), N2.end(), N.begin(), + [](float a) { return sqrtf(a); }); + + // Compute the noise level for the upper bands. + constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1); + constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2; + const float high_band_noise_level = + std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) * + kOneByNumBands; + + // Generate complex noise. + std::array<int16_t, kFftLengthBy2 - 1> random_values_int; + TableRandomValue(random_values_int.data(), random_values_int.size(), seed); + + std::array<float, kFftLengthBy2 - 1> sin; + std::array<float, kFftLengthBy2 - 1> cos; + constexpr float kScale = 6.28318530717959f / 32768.0f; + std::transform(random_values_int.begin(), random_values_int.end(), + sin.begin(), [&](int16_t a) { return -sinf(kScale * a); }); + std::transform(random_values_int.begin(), random_values_int.end(), + cos.begin(), [&](int16_t a) { return cosf(kScale * a); }); + + // Form low-frequency noise via spectral shaping. + N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] = + N_high->re[kFftLengthBy2] = 0.f; + std::transform(cos.begin(), cos.end(), N.begin() + 1, N_low->re.begin() + 1, + std::multiplies<float>()); + std::transform(sin.begin(), sin.end(), N.begin() + 1, N_low->im.begin() + 1, + std::multiplies<float>()); + + // Form the high-frequency noise via simple levelling. + std::transform(cos.begin(), cos.end(), N_high->re.begin() + 1, + [&](float a) { return high_band_noise_level * a; }); + std::transform(sin.begin(), sin.end(), N_high->im.begin() + 1, + [&](float a) { return high_band_noise_level * a; }); +} + +} // namespace aec3 + +ComfortNoiseGenerator::ComfortNoiseGenerator(Aec3Optimization optimization) + : optimization_(optimization), + seed_(42), + N2_initial_(new std::array<float, kFftLengthBy2Plus1>()) { + N2_initial_->fill(0.f); + Y2_smoothed_.fill(0.f); + N2_.fill(1.0e6f); +} + +ComfortNoiseGenerator::~ComfortNoiseGenerator() = default; + +void ComfortNoiseGenerator::Compute( + const AecState& aec_state, + const std::array<float, kFftLengthBy2Plus1>& capture_spectrum, + FftData* lower_band_noise, + FftData* upper_band_noise) { + RTC_DCHECK(lower_band_noise); + RTC_DCHECK(upper_band_noise); + const auto& Y2 = capture_spectrum; + + if (!aec_state.SaturatedCapture()) { + // Smooth Y2. + std::transform(Y2_smoothed_.begin(), Y2_smoothed_.end(), Y2.begin(), + Y2_smoothed_.begin(), + [](float a, float b) { return a + 0.1f * (b - a); }); + + if (N2_counter_ > 50) { + // Update N2 from Y2_smoothed. + std::transform(N2_.begin(), N2_.end(), Y2_smoothed_.begin(), N2_.begin(), + [](float a, float b) { + return b < a ? (0.9f * b + 0.1f * a) * 1.0002f + : a * 1.0002f; + }); + } + + if (N2_initial_) { + if (++N2_counter_ == 1000) { + N2_initial_.reset(); + } else { + // Compute the N2_initial from N2. + std::transform( + N2_.begin(), N2_.end(), N2_initial_->begin(), N2_initial_->begin(), + [](float a, float b) { return a > b ? b + 0.001f * (a - b) : a; }); + } + } + } + + // Limit the noise to a floor of -96 dBFS. + constexpr float kNoiseFloor = 440.f; + for (auto& n : N2_) { + n = std::max(n, kNoiseFloor); + } + if (N2_initial_) { + for (auto& n : *N2_initial_) { + n = std::max(n, kNoiseFloor); + } + } + + // Choose N2 estimate to use. + const std::array<float, kFftLengthBy2Plus1>& N2 = + N2_initial_ ? *N2_initial_ : N2_; + + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::EstimateComfortNoise_SSE2(N2, &seed_, lower_band_noise, + upper_band_noise); + break; +#endif + default: + aec3::EstimateComfortNoise(N2, &seed_, lower_band_noise, + upper_band_noise); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator.h new file mode 100644 index 0000000000..2d998beb94 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ + +#include <array> +#include <memory> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace aec3 { +#if defined(WEBRTC_ARCH_X86_FAMILY) + +void EstimateComfortNoise_SSE2(const std::array<float, kFftLengthBy2Plus1>& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise); +#endif +void EstimateComfortNoise(const std::array<float, kFftLengthBy2Plus1>& N2, + uint32_t* seed, + FftData* lower_band_noise, + FftData* upper_band_noise); + +} // namespace aec3 + +// Generates the comfort noise. +class ComfortNoiseGenerator { + public: + explicit ComfortNoiseGenerator(Aec3Optimization optimization); + ~ComfortNoiseGenerator(); + + // Computes the comfort noise. + void Compute(const AecState& aec_state, + const std::array<float, kFftLengthBy2Plus1>& capture_spectrum, + FftData* lower_band_noise, + FftData* upper_band_noise); + + // Returns the estimate of the background noise spectrum. + const std::array<float, kFftLengthBy2Plus1>& NoiseSpectrum() const { + return N2_; + } + + private: + const Aec3Optimization optimization_; + uint32_t seed_; + std::unique_ptr<std::array<float, kFftLengthBy2Plus1>> N2_initial_; + std::array<float, kFftLengthBy2Plus1> Y2_smoothed_; + std::array<float, kFftLengthBy2Plus1> N2_; + int N2_counter_ = 0; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ComfortNoiseGenerator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc new file mode 100644 index 0000000000..46da3eca46 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator_unittest.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/comfort_noise_generator.h" + +#include <algorithm> +#include <numeric> + +#include "rtc_base/random.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { +namespace aec3 { +namespace { + +float Power(const FftData& N) { + std::array<float, kFftLengthBy2Plus1> N2; + N.Spectrum(Aec3Optimization::kNone, &N2); + return std::accumulate(N2.begin(), N2.end(), 0.f) / N2.size(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST(ComfortNoiseGenerator, NullLowerBandNoise) { + std::array<float, kFftLengthBy2Plus1> N2; + FftData noise; + EXPECT_DEATH( + ComfortNoiseGenerator(DetectOptimization()) + .Compute(AecState(EchoCanceller3Config{}), N2, nullptr, &noise), + ""); +} + +TEST(ComfortNoiseGenerator, NullUpperBandNoise) { + std::array<float, kFftLengthBy2Plus1> N2; + FftData noise; + EXPECT_DEATH( + ComfortNoiseGenerator(DetectOptimization()) + .Compute(AecState(EchoCanceller3Config{}), N2, &noise, nullptr), + ""); +} + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods are bitexact to their reference +// counterparts. +TEST(ComfortNoiseGenerator, TestOptimizations) { + if (WebRtc_GetCPUInfo(kSSE2) != 0) { + Random random_generator(42U); + uint32_t seed = 42; + uint32_t seed_SSE2 = 42; + std::array<float, kFftLengthBy2Plus1> N2; + FftData lower_band_noise; + FftData upper_band_noise; + FftData lower_band_noise_SSE2; + FftData upper_band_noise_SSE2; + for (int k = 0; k < 10; ++k) { + for (size_t j = 0; j < N2.size(); ++j) { + N2[j] = random_generator.Rand<float>() * 1000.f; + } + + EstimateComfortNoise(N2, &seed, &lower_band_noise, &upper_band_noise); + EstimateComfortNoise_SSE2(N2, &seed_SSE2, &lower_band_noise_SSE2, + &upper_band_noise_SSE2); + for (size_t j = 0; j < lower_band_noise.re.size(); ++j) { + EXPECT_NEAR(lower_band_noise.re[j], lower_band_noise_SSE2.re[j], + 0.00001f); + EXPECT_NEAR(upper_band_noise.re[j], upper_band_noise_SSE2.re[j], + 0.00001f); + } + for (size_t j = 1; j < lower_band_noise.re.size() - 1; ++j) { + EXPECT_NEAR(lower_band_noise.im[j], lower_band_noise_SSE2.im[j], + 0.00001f); + EXPECT_NEAR(upper_band_noise.im[j], upper_band_noise_SSE2.im[j], + 0.00001f); + } + } + } +} + +#endif + +TEST(ComfortNoiseGenerator, CorrectLevel) { + ComfortNoiseGenerator cng(DetectOptimization()); + AecState aec_state(EchoCanceller3Config{}); + + std::array<float, kFftLengthBy2Plus1> N2; + N2.fill(1000.f * 1000.f); + + FftData n_lower; + FftData n_upper; + n_lower.re.fill(0.f); + n_lower.im.fill(0.f); + n_upper.re.fill(0.f); + n_upper.im.fill(0.f); + + // Ensure instantaneous updata to nonzero noise. + cng.Compute(aec_state, N2, &n_lower, &n_upper); + EXPECT_LT(0.f, Power(n_lower)); + EXPECT_LT(0.f, Power(n_upper)); + + for (int k = 0; k < 10000; ++k) { + cng.Compute(aec_state, N2, &n_lower, &n_upper); + } + EXPECT_NEAR(N2[0], Power(n_lower), N2[0] / 10.f); + EXPECT_NEAR(N2[0], Power(n_upper), N2[0] / 10.f); +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator.cc new file mode 100644 index 0000000000..135a771a7c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator.cc @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/decimator.h" + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// b, a = signal.butter(2, 3400/8000.0, 'lowpass', analog=False) which are the +// same as b, a = signal.butter(2, 1700/4000.0, 'lowpass', analog=False). +const CascadedBiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients2 = { + {0.22711796f, 0.45423593f, 0.22711796f}, + {-0.27666461f, 0.18513647f}}; +constexpr int kNumFilters2 = 3; + +// b, a = signal.butter(2, 1500/8000.0, 'lowpass', analog=False) which are the +// same as b, a = signal.butter(2, 75/4000.0, 'lowpass', analog=False). +const CascadedBiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients4 = { + {0.0179f, 0.0357f, 0.0179f}, + {-1.5879f, 0.6594f}}; +constexpr int kNumFilters4 = 3; + +// b, a = signal.butter(2, 800/8000.0, 'lowpass', analog=False) which are the +// same as b, a = signal.butter(2, 400/4000.0, 'lowpass', analog=False). +const CascadedBiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients8 = { + {0.02008337f, 0.04016673f, 0.02008337f}, + {-1.56101808f, 0.64135154f}}; +constexpr int kNumFilters8 = 4; + +} // namespace + +Decimator::Decimator(size_t down_sampling_factor) + : down_sampling_factor_(down_sampling_factor), + low_pass_filter_( + down_sampling_factor_ == 4 + ? kLowPassFilterCoefficients4 + : (down_sampling_factor_ == 8 ? kLowPassFilterCoefficients8 + : kLowPassFilterCoefficients2), + down_sampling_factor_ == 4 + ? kNumFilters4 + : (down_sampling_factor_ == 8 ? kNumFilters8 : kNumFilters2)) { + RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 || + down_sampling_factor_ == 8); +} + +void Decimator::Decimate(rtc::ArrayView<const float> in, + rtc::ArrayView<float> out) { + RTC_DCHECK_EQ(kBlockSize, in.size()); + RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size()); + std::array<float, kBlockSize> x; + + // Limit the frequency content of the signal to avoid aliasing. + low_pass_filter_.Process(in, x); + + // Downsample the signal. + for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) { + RTC_DCHECK_GT(kBlockSize, k); + out[j] = x[k]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator.h new file mode 100644 index 0000000000..7418a26fb0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ + +#include <array> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/cascaded_biquad_filter.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Provides functionality for decimating a signal. +class Decimator { + public: + explicit Decimator(size_t down_sampling_factor); + + // Downsamples the signal. + void Decimate(rtc::ArrayView<const float> in, rtc::ArrayView<float> out); + + private: + const size_t down_sampling_factor_; + CascadedBiQuadFilter low_pass_filter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(Decimator); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator_unittest.cc new file mode 100644 index 0000000000..e77a990f9c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator_unittest.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/decimator.h" + +#include <math.h> +#include <algorithm> +#include <array> +#include <numeric> +#include <sstream> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; +constexpr float kPi = 3.141592f; +constexpr size_t kNumStartupBlocks = 50; +constexpr size_t kNumBlocks = 1000; + +void ProduceDecimatedSinusoidalOutputPower(int sample_rate_hz, + size_t down_sampling_factor, + float sinusoidal_frequency_hz, + float* input_power, + float* output_power) { + float input[kBlockSize * kNumBlocks]; + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + // Produce a sinusoid of the specified frequency. + for (size_t k = 0; k < kBlockSize * kNumBlocks; ++k) { + input[k] = + 32767.f * sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); + } + + Decimator decimator(down_sampling_factor); + std::vector<float> output(sub_block_size * kNumBlocks); + + for (size_t k = 0; k < kNumBlocks; ++k) { + std::vector<float> sub_block(sub_block_size); + + decimator.Decimate( + rtc::ArrayView<const float>(&input[k * kBlockSize], kBlockSize), + sub_block); + + std::copy(sub_block.begin(), sub_block.end(), + output.begin() + k * sub_block_size); + } + + ASSERT_GT(kNumBlocks, kNumStartupBlocks); + rtc::ArrayView<const float> input_to_evaluate( + &input[kNumStartupBlocks * kBlockSize], + (kNumBlocks - kNumStartupBlocks) * kBlockSize); + rtc::ArrayView<const float> output_to_evaluate( + &output[kNumStartupBlocks * sub_block_size], + (kNumBlocks - kNumStartupBlocks) * sub_block_size); + *input_power = + std::inner_product(input_to_evaluate.begin(), input_to_evaluate.end(), + input_to_evaluate.begin(), 0.f) / + input_to_evaluate.size(); + *output_power = + std::inner_product(output_to_evaluate.begin(), output_to_evaluate.end(), + output_to_evaluate.begin(), 0.f) / + output_to_evaluate.size(); +} + +} // namespace + +// Verifies that there is little aliasing from upper frequencies in the +// downsampling. +TEST(Decimator, NoLeakageFromUpperFrequencies) { + float input_power; + float output_power; + for (auto rate : {8000, 16000, 32000, 48000}) { + for (auto down_sampling_factor : kDownSamplingFactors) { + ProduceDebugText(rate); + ProduceDecimatedSinusoidalOutputPower(rate, down_sampling_factor, + 3.f / 8.f * rate, &input_power, + &output_power); + EXPECT_GT(0.0001f * input_power, output_power); + } + } +} + +// Verifies that the impact of low-frequency content is small during the +// downsampling. +TEST(Decimator, NoImpactOnLowerFrequencies) { + float input_power; + float output_power; + for (auto rate : {8000, 16000, 32000, 48000}) { + for (auto down_sampling_factor : kDownSamplingFactors) { + ProduceDebugText(rate); + ProduceDecimatedSinusoidalOutputPower(rate, down_sampling_factor, 200.f, + &input_power, &output_power); + EXPECT_LT(0.7f * input_power, output_power); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies the check for the input size. +TEST(Decimator, WrongInputSize) { + Decimator decimator(4); + std::vector<float> x(std::vector<float>(kBlockSize - 1, 0.f)); + std::array<float, kBlockSize / 4> x_downsampled; + EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); +} + +// Verifies the check for non-null output parameter. +TEST(Decimator, NullOutput) { + Decimator decimator(4); + std::vector<float> x(std::vector<float>(kBlockSize, 0.f)); + EXPECT_DEATH(decimator.Decimate(x, nullptr), ""); +} + +// Verifies the check for the output size. +TEST(Decimator, WrongOutputSize) { + Decimator decimator(4); + std::vector<float> x(std::vector<float>(kBlockSize, 0.f)); + std::array<float, kBlockSize / 4 - 1> x_downsampled; + EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); +} + +// Verifies the check for the correct downsampling factor. +TEST(Decimator, CorrectDownSamplingFactor) { + EXPECT_DEATH(Decimator(3), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc new file mode 100644 index 0000000000..efc733b182 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" + +namespace webrtc { + +DownsampledRenderBuffer::DownsampledRenderBuffer(size_t downsampled_buffer_size) + : buffer(downsampled_buffer_size, 0.f) {} + +DownsampledRenderBuffer::~DownsampledRenderBuffer() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.h new file mode 100644 index 0000000000..531852a0c9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ + +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Holds the circular buffer of the downsampled render data. +struct DownsampledRenderBuffer { + explicit DownsampledRenderBuffer(size_t downsampled_buffer_size); + ~DownsampledRenderBuffer(); + std::vector<float> buffer; + int position = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3.cc new file mode 100644 index 0000000000..491faa01fb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3.cc @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_canceller3.h" + +#include <sstream> + +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/atomicops.h" + +namespace webrtc { + +namespace { + +enum class EchoCanceller3ApiCall { kCapture, kRender }; + +bool DetectSaturation(rtc::ArrayView<const float> y) { + for (auto y_k : y) { + if (y_k >= 32700.0f || y_k <= -32700.0f) { + return true; + } + } + return false; +} + +void FillSubFrameView(AudioBuffer* frame, + size_t sub_frame_index, + std::vector<rtc::ArrayView<float>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_LE(0, sub_frame_index); + RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size()); + for (size_t k = 0; k < sub_frame_view->size(); ++k) { + (*sub_frame_view)[k] = rtc::ArrayView<float>( + &frame->split_bands_f(0)[k][sub_frame_index * kSubFrameLength], + kSubFrameLength); + } +} + +void FillSubFrameView(std::vector<std::vector<float>>* frame, + size_t sub_frame_index, + std::vector<rtc::ArrayView<float>>* sub_frame_view) { + RTC_DCHECK_GE(1, sub_frame_index); + RTC_DCHECK_EQ(frame->size(), sub_frame_view->size()); + for (size_t k = 0; k < frame->size(); ++k) { + (*sub_frame_view)[k] = rtc::ArrayView<float>( + &(*frame)[k][sub_frame_index * kSubFrameLength], kSubFrameLength); + } +} + +void ProcessCaptureFrameContent( + AudioBuffer* capture, + bool level_change, + bool saturated_microphone_signal, + size_t sub_frame_index, + FrameBlocker* capture_blocker, + BlockFramer* output_framer, + BlockProcessor* block_processor, + std::vector<std::vector<float>>* block, + std::vector<rtc::ArrayView<float>>* sub_frame_view) { + FillSubFrameView(capture, sub_frame_index, sub_frame_view); + capture_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block); + block_processor->ProcessCapture(level_change, saturated_microphone_signal, + block); + output_framer->InsertBlockAndExtractSubFrame(*block, sub_frame_view); +} + +void ProcessRemainingCaptureFrameContent( + bool level_change, + bool saturated_microphone_signal, + FrameBlocker* capture_blocker, + BlockFramer* output_framer, + BlockProcessor* block_processor, + std::vector<std::vector<float>>* block) { + if (!capture_blocker->IsBlockAvailable()) { + return; + } + + capture_blocker->ExtractBlock(block); + block_processor->ProcessCapture(level_change, saturated_microphone_signal, + block); + output_framer->InsertBlock(*block); +} + +void BufferRenderFrameContent( + std::vector<std::vector<float>>* render_frame, + size_t sub_frame_index, + FrameBlocker* render_blocker, + BlockProcessor* block_processor, + std::vector<std::vector<float>>* block, + std::vector<rtc::ArrayView<float>>* sub_frame_view) { + FillSubFrameView(render_frame, sub_frame_index, sub_frame_view); + render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block); + block_processor->BufferRender(*block); +} + +void BufferRemainingRenderFrameContent(FrameBlocker* render_blocker, + BlockProcessor* block_processor, + std::vector<std::vector<float>>* block) { + if (!render_blocker->IsBlockAvailable()) { + return; + } + render_blocker->ExtractBlock(block); + block_processor->BufferRender(*block); +} + +void CopyBufferIntoFrame(AudioBuffer* buffer, + size_t num_bands, + size_t frame_length, + std::vector<std::vector<float>>* frame) { + RTC_DCHECK_EQ(num_bands, frame->size()); + RTC_DCHECK_EQ(frame_length, (*frame)[0].size()); + for (size_t k = 0; k < num_bands; ++k) { + rtc::ArrayView<float> buffer_view(&buffer->split_bands_f(0)[k][0], + frame_length); + std::copy(buffer_view.begin(), buffer_view.end(), (*frame)[k].begin()); + } +} + +// [B,A] = butter(2,100/4000,'high') +const CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients_8kHz = {{0.94598f, -1.89195f, 0.94598f}, + {-1.88903f, 0.89487f}}; +const int kNumberOfHighPassBiQuads_8kHz = 1; + +// [B,A] = butter(2,100/8000,'high') +const CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients_16kHz = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; +const int kNumberOfHighPassBiQuads_16kHz = 1; + +} // namespace + +class EchoCanceller3::RenderWriter { + public: + RenderWriter(ApmDataDumper* data_dumper, + SwapQueue<std::vector<std::vector<float>>, + Aec3RenderQueueItemVerifier>* render_transfer_queue, + std::unique_ptr<CascadedBiQuadFilter> render_highpass_filter, + int sample_rate_hz, + int frame_length, + int num_bands); + ~RenderWriter(); + void Insert(AudioBuffer* input); + + private: + ApmDataDumper* data_dumper_; + const int sample_rate_hz_; + const size_t frame_length_; + const int num_bands_; + std::unique_ptr<CascadedBiQuadFilter> render_highpass_filter_; + std::vector<std::vector<float>> render_queue_input_frame_; + SwapQueue<std::vector<std::vector<float>>, Aec3RenderQueueItemVerifier>* + render_transfer_queue_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderWriter); +}; + +EchoCanceller3::RenderWriter::RenderWriter( + ApmDataDumper* data_dumper, + SwapQueue<std::vector<std::vector<float>>, Aec3RenderQueueItemVerifier>* + render_transfer_queue, + std::unique_ptr<CascadedBiQuadFilter> render_highpass_filter, + int sample_rate_hz, + int frame_length, + int num_bands) + : data_dumper_(data_dumper), + sample_rate_hz_(sample_rate_hz), + frame_length_(frame_length), + num_bands_(num_bands), + render_highpass_filter_(std::move(render_highpass_filter)), + render_queue_input_frame_(num_bands_, + std::vector<float>(frame_length_, 0.f)), + render_transfer_queue_(render_transfer_queue) { + RTC_DCHECK(data_dumper); +} + +EchoCanceller3::RenderWriter::~RenderWriter() = default; + +void EchoCanceller3::RenderWriter::Insert(AudioBuffer* input) { + RTC_DCHECK_EQ(1, input->num_channels()); + RTC_DCHECK_EQ(frame_length_, input->num_frames_per_band()); + data_dumper_->DumpWav("aec3_render_input", frame_length_, + &input->split_bands_f(0)[0][0], + LowestBandRate(sample_rate_hz_), 1); + + CopyBufferIntoFrame(input, num_bands_, frame_length_, + &render_queue_input_frame_); + + if (render_highpass_filter_) { + render_highpass_filter_->Process(render_queue_input_frame_[0]); + } + + static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_)); +} + +int EchoCanceller3::instance_count_ = 0; + +EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, + int sample_rate_hz, + bool use_highpass_filter) + : EchoCanceller3(sample_rate_hz, + use_highpass_filter, + std::unique_ptr<BlockProcessor>( + BlockProcessor::Create(config, sample_rate_hz))) {} +EchoCanceller3::EchoCanceller3(int sample_rate_hz, + bool use_highpass_filter, + std::unique_ptr<BlockProcessor> block_processor) + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + sample_rate_hz_(sample_rate_hz), + num_bands_(NumBandsForRate(sample_rate_hz_)), + frame_length_(rtc::CheckedDivExact(LowestBandRate(sample_rate_hz_), 100)), + output_framer_(num_bands_), + capture_blocker_(num_bands_), + render_blocker_(num_bands_), + render_transfer_queue_( + kRenderTransferQueueSize, + std::vector<std::vector<float>>( + num_bands_, + std::vector<float>(frame_length_, 0.f)), + Aec3RenderQueueItemVerifier(num_bands_, frame_length_)), + block_processor_(std::move(block_processor)), + render_queue_output_frame_(num_bands_, + std::vector<float>(frame_length_, 0.f)), + block_(num_bands_, std::vector<float>(kBlockSize, 0.f)), + sub_frame_view_(num_bands_) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); + + std::unique_ptr<CascadedBiQuadFilter> render_highpass_filter; + if (use_highpass_filter) { + render_highpass_filter.reset(new CascadedBiQuadFilter( + sample_rate_hz_ == 8000 ? kHighPassFilterCoefficients_8kHz + : kHighPassFilterCoefficients_16kHz, + sample_rate_hz_ == 8000 ? kNumberOfHighPassBiQuads_8kHz + : kNumberOfHighPassBiQuads_16kHz)); + capture_highpass_filter_.reset(new CascadedBiQuadFilter( + sample_rate_hz_ == 8000 ? kHighPassFilterCoefficients_8kHz + : kHighPassFilterCoefficients_16kHz, + sample_rate_hz_ == 8000 ? kNumberOfHighPassBiQuads_8kHz + : kNumberOfHighPassBiQuads_16kHz)); + } + + render_writer_.reset( + new RenderWriter(data_dumper_.get(), &render_transfer_queue_, + std::move(render_highpass_filter), sample_rate_hz_, + frame_length_, num_bands_)); + + RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000); + RTC_DCHECK_GE(kMaxNumBands, num_bands_); +} + +EchoCanceller3::~EchoCanceller3() = default; + +void EchoCanceller3::AnalyzeRender(AudioBuffer* render) { + RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_); + RTC_DCHECK(render); + data_dumper_->DumpRaw("aec3_call_order", + static_cast<int>(EchoCanceller3ApiCall::kRender)); + + return render_writer_->Insert(render); +} + +void EchoCanceller3::AnalyzeCapture(AudioBuffer* capture) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(capture); + data_dumper_->DumpWav("aec3_capture_analyze_input", capture->num_frames(), + capture->channels_f()[0], sample_rate_hz_, 1); + + saturated_microphone_signal_ = false; + for (size_t k = 0; k < capture->num_channels(); ++k) { + saturated_microphone_signal_ |= + DetectSaturation(rtc::ArrayView<const float>(capture->channels_f()[k], + capture->num_frames())); + if (saturated_microphone_signal_) { + break; + } + } +} + +void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + RTC_DCHECK(capture); + RTC_DCHECK_EQ(1u, capture->num_channels()); + RTC_DCHECK_EQ(num_bands_, capture->num_bands()); + RTC_DCHECK_EQ(frame_length_, capture->num_frames_per_band()); + data_dumper_->DumpRaw("aec3_call_order", + static_cast<int>(EchoCanceller3ApiCall::kCapture)); + + rtc::ArrayView<float> capture_lower_band = + rtc::ArrayView<float>(&capture->split_bands_f(0)[0][0], frame_length_); + + data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, + LowestBandRate(sample_rate_hz_), 1); + + EmptyRenderQueue(); + + if (capture_highpass_filter_) { + capture_highpass_filter_->Process(capture_lower_band); + } + + ProcessCaptureFrameContent( + capture, level_change, saturated_microphone_signal_, 0, &capture_blocker_, + &output_framer_, block_processor_.get(), &block_, &sub_frame_view_); + + if (sample_rate_hz_ != 8000) { + ProcessCaptureFrameContent( + capture, level_change, saturated_microphone_signal_, 1, + &capture_blocker_, &output_framer_, block_processor_.get(), &block_, + &sub_frame_view_); + } + + ProcessRemainingCaptureFrameContent( + level_change, saturated_microphone_signal_, &capture_blocker_, + &output_framer_, block_processor_.get(), &block_); + + data_dumper_->DumpWav("aec3_capture_output", frame_length_, + &capture->split_bands_f(0)[0][0], + LowestBandRate(sample_rate_hz_), 1); +} + +EchoControl::Metrics EchoCanceller3::GetMetrics() const { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + Metrics metrics; + block_processor_->GetMetrics(&metrics); + return metrics; +} + +bool EchoCanceller3::Validate(const EchoCanceller3Config& config) { + return true; +} + +void EchoCanceller3::EmptyRenderQueue() { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + bool frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + while (frame_to_buffer) { + BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_, + block_processor_.get(), &block_, &sub_frame_view_); + + if (sample_rate_hz_ != 8000) { + BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_, + block_processor_.get(), &block_, + &sub_frame_view_); + } + + BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(), + &block_); + + frame_to_buffer = + render_transfer_queue_.Remove(&render_queue_output_frame_); + } +} + +EchoCanceller3Factory::EchoCanceller3Factory() {} + +EchoCanceller3Factory::EchoCanceller3Factory(const EchoCanceller3Config& config) + : config_(config) { + // Revert to default configuration if needed. + if (!EchoCanceller3::Validate(config_)) { + config_ = EchoCanceller3Config(); + } +} + +std::unique_ptr<EchoControl> EchoCanceller3Factory::Create(int sample_rate_hz) { + return std::unique_ptr<EchoControl>( + new EchoCanceller3(config_, sample_rate_hz, true)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3.h new file mode 100644 index 0000000000..475bacb723 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ + +#include "modules/audio_processing/aec3/block_framer.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/cascaded_biquad_filter.h" +#include "modules/audio_processing/aec3/frame_blocker.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/swap_queue.h" + +namespace webrtc { + +// Functor for verifying the invariance of the frames being put into the render +// queue. +class Aec3RenderQueueItemVerifier { + public: + explicit Aec3RenderQueueItemVerifier(size_t num_bands, size_t frame_length) + : num_bands_(num_bands), frame_length_(frame_length) {} + + bool operator()(const std::vector<std::vector<float>>& v) const { + if (v.size() != num_bands_) { + return false; + } + for (const auto& v_k : v) { + if (v_k.size() != frame_length_) { + return false; + } + } + return true; + } + + private: + const size_t num_bands_; + const size_t frame_length_; +}; + +// Main class for the echo canceller3. +// It does 4 things: +// -Receives 10 ms frames of band-split audio. +// -Optionally applies an anti-hum (high-pass) filter on the +// received signals. +// -Provides the lower level echo canceller functionality with +// blocks of 64 samples of audio data. +// -Partially handles the jitter in the render and capture API +// call sequence. +// +// The class is supposed to be used in a non-concurrent manner apart from the +// AnalyzeRender call which can be called concurrently with the other methods. +class EchoCanceller3 : public EchoControl { + public: + // Normal c-tor to use. + EchoCanceller3(const EchoCanceller3Config& config, + int sample_rate_hz, + bool use_highpass_filter); + // Testing c-tor that is used only for testing purposes. + EchoCanceller3(int sample_rate_hz, + bool use_highpass_filter, + std::unique_ptr<BlockProcessor> block_processor); + ~EchoCanceller3() override; + // Analyzes and stores an internal copy of the split-band domain render + // signal. + void AnalyzeRender(AudioBuffer* farend) override; + // Analyzes the full-band domain capture signal to detect signal saturation. + void AnalyzeCapture(AudioBuffer* capture) override; + // Processes the split-band domain capture signal in order to remove any echo + // present in the signal. + void ProcessCapture(AudioBuffer* capture, bool level_change) override; + // Collect current metrics from the echo canceller. + Metrics GetMetrics() const override; + + // Signals whether an external detector has detected echo leakage from the + // echo canceller. + // Note that in the case echo leakage has been flagged, it should be unflagged + // once it is no longer occurring. + void UpdateEchoLeakageStatus(bool leakage_detected) { + RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); + block_processor_->UpdateEchoLeakageStatus(leakage_detected); + } + + // Validates a config. + static bool Validate(const EchoCanceller3Config& config); + + private: + class RenderWriter; + + // Empties the render SwapQueue. + void EmptyRenderQueue(); + + rtc::RaceChecker capture_race_checker_; + rtc::RaceChecker render_race_checker_; + + // State that is accessed by the AnalyzeRender call. + std::unique_ptr<RenderWriter> render_writer_ + RTC_GUARDED_BY(render_race_checker_); + + // State that may be accessed by the capture thread. + static int instance_count_; + std::unique_ptr<ApmDataDumper> data_dumper_; + const int sample_rate_hz_; + const int num_bands_; + const size_t frame_length_; + BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_); + FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_); + FrameBlocker render_blocker_ RTC_GUARDED_BY(capture_race_checker_); + SwapQueue<std::vector<std::vector<float>>, Aec3RenderQueueItemVerifier> + render_transfer_queue_; + std::unique_ptr<BlockProcessor> block_processor_ + RTC_GUARDED_BY(capture_race_checker_); + std::vector<std::vector<float>> render_queue_output_frame_ + RTC_GUARDED_BY(capture_race_checker_); + std::unique_ptr<CascadedBiQuadFilter> capture_highpass_filter_ + RTC_GUARDED_BY(capture_race_checker_); + bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) = + false; + std::vector<std::vector<float>> block_ RTC_GUARDED_BY(capture_race_checker_); + std::vector<rtc::ArrayView<float>> sub_frame_view_ + RTC_GUARDED_BY(capture_race_checker_); + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(EchoCanceller3); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc new file mode 100644 index 0000000000..75de48b547 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3_unittest.cc @@ -0,0 +1,748 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_canceller3.h" + +#include <deque> +#include <memory> +#include <sstream> +#include <string> +#include <utility> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/frame_blocker.h" +#include "modules/audio_processing/aec3/mock/mock_block_processor.h" +#include "modules/audio_processing/audio_buffer.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using testing::StrictMock; +using testing::_; + +// Populates the frame with linearly increasing sample values for each band, +// with a band-specific offset, in order to allow simple bitexactness +// verification for each band. +void PopulateInputFrame(size_t frame_length, + size_t num_bands, + size_t frame_index, + float* const* frame, + int offset) { + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + float value = static_cast<int>(frame_index * frame_length + i) + offset; + frame[k][i] = (value > 0 ? 5000 * k + value : 0); + } + } +} + +// Populates the frame with linearly increasing sample values. +void PopulateInputFrame(size_t frame_length, + size_t frame_index, + float* frame, + int offset) { + for (size_t i = 0; i < frame_length; ++i) { + float value = static_cast<int>(frame_index * frame_length + i) + offset; + frame[i] = std::max(value, 0.f); + } +} + +// Verifies the that samples in the output frame are identical to the samples +// that were produced for the input frame, with an offset in order to compensate +// for buffering delays. +bool VerifyOutputFrameBitexactness(size_t frame_length, + size_t num_bands, + size_t frame_index, + const float* const* frame, + int offset) { + float reference_frame_data[kMaxNumBands][2 * kSubFrameLength]; + float* reference_frame[kMaxNumBands]; + for (size_t k = 0; k < num_bands; ++k) { + reference_frame[k] = &reference_frame_data[k][0]; + } + + PopulateInputFrame(frame_length, num_bands, frame_index, reference_frame, + offset); + for (size_t k = 0; k < num_bands; ++k) { + for (size_t i = 0; i < frame_length; ++i) { + if (reference_frame[k][i] != frame[k][i]) { + return false; + } + } + } + + return true; +} + +// Class for testing that the capture data is properly received by the block +// processor and that the processor data is properly passed to the +// EchoCanceller3 output. +class CaptureTransportVerificationProcessor : public BlockProcessor { + public: + explicit CaptureTransportVerificationProcessor(size_t num_bands) {} + ~CaptureTransportVerificationProcessor() override = default; + + void ProcessCapture(bool level_change, + bool saturated_microphone_signal, + std::vector<std::vector<float>>* capture_block) override { + } + + void BufferRender(const std::vector<std::vector<float>>& block) override {} + + void UpdateEchoLeakageStatus(bool leakage_detected) override {} + + void GetMetrics(EchoControl::Metrics* metrics) const override {} + + private: + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(CaptureTransportVerificationProcessor); +}; + +// Class for testing that the render data is properly received by the block +// processor. +class RenderTransportVerificationProcessor : public BlockProcessor { + public: + explicit RenderTransportVerificationProcessor(size_t num_bands) {} + ~RenderTransportVerificationProcessor() override = default; + + void ProcessCapture(bool level_change, + bool saturated_microphone_signal, + std::vector<std::vector<float>>* capture_block) override { + std::vector<std::vector<float>> render_block = + received_render_blocks_.front(); + received_render_blocks_.pop_front(); + capture_block->swap(render_block); + } + + void BufferRender(const std::vector<std::vector<float>>& block) override { + received_render_blocks_.push_back(block); + } + + void UpdateEchoLeakageStatus(bool leakage_detected) override {} + + void GetMetrics(EchoControl::Metrics* metrics) const override {} + + private: + std::deque<std::vector<std::vector<float>>> received_render_blocks_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderTransportVerificationProcessor); +}; + +class EchoCanceller3Tester { + public: + explicit EchoCanceller3Tester(int sample_rate_hz) + : sample_rate_hz_(sample_rate_hz), + num_bands_(NumBandsForRate(sample_rate_hz_)), + frame_length_(sample_rate_hz_ == 8000 ? 80 : 160), + fullband_frame_length_(rtc::CheckedDivExact(sample_rate_hz_, 100)), + capture_buffer_(fullband_frame_length_, + 1, + fullband_frame_length_, + 1, + fullband_frame_length_), + render_buffer_(fullband_frame_length_, + 1, + fullband_frame_length_, + 1, + fullband_frame_length_) {} + + // Verifies that the capture data is properly received by the block processor + // and that the processor data is properly passed to the EchoCanceller3 + // output. + void RunCaptureTransportVerificationTest() { + EchoCanceller3 aec3( + sample_rate_hz_, false, + std::unique_ptr<BlockProcessor>( + new CaptureTransportVerificationProcessor(num_bands_))); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels_f()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + EXPECT_TRUE(VerifyOutputFrameBitexactness( + frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], -64)); + } + } + + // Test method for testing that the render data is properly received by the + // block processor. + void RunRenderTransportVerificationTest() { + EchoCanceller3 aec3( + sample_rate_hz_, false, + std::unique_ptr<BlockProcessor>( + new RenderTransportVerificationProcessor(num_bands_))); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], 100); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands_f(0)[0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + EXPECT_TRUE(VerifyOutputFrameBitexactness( + frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], -64)); + } + } + + // Verifies that information about echo path changes are properly propagated + // to the block processor. + // The cases tested are: + // -That no set echo path change flags are received when there is no echo path + // change. + // -That set echo path change flags are received and continues to be received + // as long as echo path changes are flagged. + // -That set echo path change flags are no longer received when echo path + // change events stop being flagged. + enum class EchoPathChangeTestVariant { kNone, kOneSticky, kOneNonSticky }; + + void RunEchoPathChangeVerificationTest( + EchoPathChangeTestVariant echo_path_change_test_variant) { + const size_t num_full_blocks_per_frame = + rtc::CheckedDivExact(LowestBandRate(sample_rate_hz_), 100) / kBlockSize; + const size_t expected_num_block_to_process = + (kNumFramesToProcess * + rtc::CheckedDivExact(LowestBandRate(sample_rate_hz_), 100)) / + kBlockSize; + std::unique_ptr<testing::StrictMock<webrtc::test::MockBlockProcessor>> + block_processor_mock( + new StrictMock<webrtc::test::MockBlockProcessor>()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(expected_num_block_to_process); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + + switch (echo_path_change_test_variant) { + case EchoPathChangeTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, ProcessCapture(false, _, _)) + .Times(expected_num_block_to_process); + break; + case EchoPathChangeTestVariant::kOneSticky: + EXPECT_CALL(*block_processor_mock, ProcessCapture(true, _, _)) + .Times(expected_num_block_to_process); + break; + case EchoPathChangeTestVariant::kOneNonSticky: + EXPECT_CALL(*block_processor_mock, ProcessCapture(true, _, _)) + .Times(num_full_blocks_per_frame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(false, _, _)) + .Times(expected_num_block_to_process - num_full_blocks_per_frame); + break; + } + + EchoCanceller3 aec3(sample_rate_hz_, false, + std::move(block_processor_mock)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + bool echo_path_change = false; + switch (echo_path_change_test_variant) { + case EchoPathChangeTestVariant::kNone: + break; + case EchoPathChangeTestVariant::kOneSticky: + echo_path_change = true; + break; + case EchoPathChangeTestVariant::kOneNonSticky: + if (frame_index == 0) { + echo_path_change = true; + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels_f()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, echo_path_change); + } + } + + // Test for verifying that echo leakage information is being properly passed + // to the processor. + // The cases tested are: + // -That no method calls are received when they should not. + // -That false values are received each time they are flagged. + // -That true values are received each time they are flagged. + // -That a false value is received when flagged after a true value has been + // flagged. + enum class EchoLeakageTestVariant { + kNone, + kFalseSticky, + kTrueSticky, + kTrueNonSticky + }; + + void RunEchoLeakageVerificationTest( + EchoLeakageTestVariant leakage_report_variant) { + const size_t expected_num_block_to_process = + (kNumFramesToProcess * + rtc::CheckedDivExact(LowestBandRate(sample_rate_hz_), 100)) / + kBlockSize; + std::unique_ptr<testing::StrictMock<webrtc::test::MockBlockProcessor>> + block_processor_mock( + new StrictMock<webrtc::test::MockBlockProcessor>()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(expected_num_block_to_process); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, _, _)) + .Times(expected_num_block_to_process); + + switch (leakage_report_variant) { + case EchoLeakageTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + break; + case EchoLeakageTestVariant::kFalseSticky: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(false)) + .Times(1); + break; + case EchoLeakageTestVariant::kTrueSticky: + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(true)) + .Times(1); + break; + case EchoLeakageTestVariant::kTrueNonSticky: { + testing::InSequence s; + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(true)) + .Times(1); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(false)) + .Times(kNumFramesToProcess - 1); + } break; + } + + EchoCanceller3 aec3(sample_rate_hz_, false, + std::move(block_processor_mock)); + + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + switch (leakage_report_variant) { + case EchoLeakageTestVariant::kNone: + break; + case EchoLeakageTestVariant::kFalseSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(false); + } + break; + case EchoLeakageTestVariant::kTrueSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(true); + } + break; + case EchoLeakageTestVariant::kTrueNonSticky: + if (frame_index == 0) { + aec3.UpdateEchoLeakageStatus(true); + } else { + aec3.UpdateEchoLeakageStatus(false); + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], 0); + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels_f()[0][0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + } + } + + // This verifies that saturation information is properly passed to the + // BlockProcessor. + // The cases tested are: + // -That no saturation event is passed to the processor if there is no + // saturation. + // -That one frame with one negative saturated sample value is reported to be + // saturated and that following non-saturated frames are properly reported as + // not being saturated. + // -That one frame with one positive saturated sample value is reported to be + // saturated and that following non-saturated frames are properly reported as + // not being saturated. + enum class SaturationTestVariant { kNone, kOneNegative, kOnePositive }; + + void RunCaptureSaturationVerificationTest( + SaturationTestVariant saturation_variant) { + const size_t num_full_blocks_per_frame = + rtc::CheckedDivExact(LowestBandRate(sample_rate_hz_), 100) / kBlockSize; + const size_t expected_num_block_to_process = + (kNumFramesToProcess * + rtc::CheckedDivExact(LowestBandRate(sample_rate_hz_), 100)) / + kBlockSize; + std::unique_ptr<testing::StrictMock<webrtc::test::MockBlockProcessor>> + block_processor_mock( + new StrictMock<webrtc::test::MockBlockProcessor>()); + EXPECT_CALL(*block_processor_mock, BufferRender(_)) + .Times(expected_num_block_to_process); + EXPECT_CALL(*block_processor_mock, UpdateEchoLeakageStatus(_)).Times(0); + + switch (saturation_variant) { + case SaturationTestVariant::kNone: + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _)) + .Times(expected_num_block_to_process); + break; + case SaturationTestVariant::kOneNegative: { + testing::InSequence s; + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, true, _)) + .Times(num_full_blocks_per_frame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _)) + .Times(expected_num_block_to_process - num_full_blocks_per_frame); + } break; + case SaturationTestVariant::kOnePositive: { + testing::InSequence s; + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, true, _)) + .Times(num_full_blocks_per_frame); + EXPECT_CALL(*block_processor_mock, ProcessCapture(_, false, _)) + .Times(expected_num_block_to_process - num_full_blocks_per_frame); + } break; + } + + EchoCanceller3 aec3(sample_rate_hz_, false, + std::move(block_processor_mock)); + for (size_t frame_index = 0; frame_index < kNumFramesToProcess; + ++frame_index) { + for (int k = 0; k < fullband_frame_length_; ++k) { + capture_buffer_.channels_f()[0][k] = 0.f; + } + switch (saturation_variant) { + case SaturationTestVariant::kNone: + break; + case SaturationTestVariant::kOneNegative: + if (frame_index == 0) { + capture_buffer_.channels_f()[0][10] = -32768.f; + } + break; + case SaturationTestVariant::kOnePositive: + if (frame_index == 0) { + capture_buffer_.channels_f()[0][10] = 32767.f; + } + break; + } + + aec3.AnalyzeCapture(&capture_buffer_); + OptionalBandSplit(); + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], 0); + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands_f(0)[0], 0); + + aec3.AnalyzeRender(&render_buffer_); + aec3.ProcessCapture(&capture_buffer_, false); + } + } + + // This test verifies that the swapqueue is able to handle jitter in the + // capture and render API calls. + void RunRenderSwapQueueVerificationTest() { + EchoCanceller3 aec3( + sample_rate_hz_, false, + std::unique_ptr<BlockProcessor>( + new RenderTransportVerificationProcessor(num_bands_))); + + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSize; + ++frame_index) { + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &render_buffer_.split_bands_f(0)[0], 0); + + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + + aec3.AnalyzeRender(&render_buffer_); + } + + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSize; + ++frame_index) { + aec3.AnalyzeCapture(&capture_buffer_); + if (sample_rate_hz_ > 16000) { + capture_buffer_.SplitIntoFrequencyBands(); + } + + PopulateInputFrame(frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], 0); + + aec3.ProcessCapture(&capture_buffer_, false); + EXPECT_TRUE(VerifyOutputFrameBitexactness( + frame_length_, num_bands_, frame_index, + &capture_buffer_.split_bands_f(0)[0], -64)); + } + } + + // This test verifies that a buffer overrun in the render swapqueue is + // properly reported. + void RunRenderPipelineSwapQueueOverrunReturnValueTest() { + EchoCanceller3 aec3(EchoCanceller3Config(), sample_rate_hz_, false); + + constexpr size_t kRenderTransferQueueSize = 30; + for (size_t k = 0; k < 2; ++k) { + for (size_t frame_index = 0; frame_index < kRenderTransferQueueSize; + ++frame_index) { + if (sample_rate_hz_ > 16000) { + render_buffer_.SplitIntoFrequencyBands(); + } + PopulateInputFrame(frame_length_, frame_index, + &render_buffer_.channels_f()[0][0], 0); + + if (k == 0) { + aec3.AnalyzeRender(&render_buffer_); + } else { + aec3.AnalyzeRender(&render_buffer_); + } + } + } + } + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + // Verifies the that the check for the number of bands in the AnalyzeRender + // input is correct by adjusting the sample rates of EchoCanceller3 and the + // input AudioBuffer to have a different number of bands. + void RunAnalyzeRenderNumBandsCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the number of bands for the rates are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; + EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, false); + PopulateInputFrame(frame_length_, 0, &render_buffer_.channels_f()[0][0], 0); + + EXPECT_DEATH(aec3.AnalyzeRender(&render_buffer_), ""); + } + + // Verifies the that the check for the number of bands in the ProcessCapture + // input is correct by adjusting the sample rates of EchoCanceller3 and the + // input AudioBuffer to have a different number of bands. + void RunProcessCaptureNumBandsCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the number of bands for the rates are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 48000 ? 32000 : 48000; + EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, false); + PopulateInputFrame(frame_length_, num_bands_, 0, + &capture_buffer_.split_bands_f(0)[0], 100); + EXPECT_DEATH(aec3.ProcessCapture(&capture_buffer_, false), ""); + } + + // Verifies the that the check for the frame length in the AnalyzeRender input + // is correct by adjusting the sample rates of EchoCanceller3 and the input + // AudioBuffer to have a different frame lengths. + void RunAnalyzeRenderFrameLengthCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the band frame lengths are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 8000 ? 16000 : 8000; + EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, false); + + OptionalBandSplit(); + PopulateInputFrame(frame_length_, 0, &render_buffer_.channels_f()[0][0], 0); + + EXPECT_DEATH(aec3.AnalyzeRender(&render_buffer_), ""); + } + + // Verifies the that the check for the frame length in the AnalyzeRender input + // is correct by adjusting the sample rates of EchoCanceller3 and the input + // AudioBuffer to have a different frame lengths. + void RunProcessCaptureFrameLengthCheckVerification() { + // Set aec3_sample_rate_hz to be different from sample_rate_hz_ in such a + // way that the band frame lengths are different. + const int aec3_sample_rate_hz = sample_rate_hz_ == 8000 ? 16000 : 8000; + EchoCanceller3 aec3(EchoCanceller3Config(), aec3_sample_rate_hz, false); + + OptionalBandSplit(); + PopulateInputFrame(frame_length_, num_bands_, 0, + &capture_buffer_.split_bands_f(0)[0], 100); + + EXPECT_DEATH(aec3.ProcessCapture(&capture_buffer_, false), ""); + } + +#endif + + private: + void OptionalBandSplit() { + if (sample_rate_hz_ > 16000) { + capture_buffer_.SplitIntoFrequencyBands(); + render_buffer_.SplitIntoFrequencyBands(); + } + } + + static constexpr size_t kNumFramesToProcess = 20; + const int sample_rate_hz_; + const size_t num_bands_; + const size_t frame_length_; + const int fullband_frame_length_; + AudioBuffer capture_buffer_; + AudioBuffer render_buffer_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(EchoCanceller3Tester); +}; + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +std::string ProduceDebugText(int sample_rate_hz, int variant) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz << ", variant: " << variant; + return ss.str(); +} + +} // namespace + +TEST(EchoCanceller3Buffering, CaptureBitexactness) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunCaptureTransportVerificationTest(); + } +} + +TEST(EchoCanceller3Buffering, RenderBitexactness) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunRenderTransportVerificationTest(); + } +} + +TEST(EchoCanceller3Buffering, RenderSwapQueue) { + for (auto rate : {8000, 16000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunRenderSwapQueueVerificationTest(); + } +} + +TEST(EchoCanceller3Buffering, RenderSwapQueueOverrunReturnValue) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate) + .RunRenderPipelineSwapQueueOverrunReturnValueTest(); + } +} + +TEST(EchoCanceller3Messaging, CaptureSaturation) { + auto variants = {EchoCanceller3Tester::SaturationTestVariant::kNone, + EchoCanceller3Tester::SaturationTestVariant::kOneNegative, + EchoCanceller3Tester::SaturationTestVariant::kOnePositive}; + for (auto rate : {8000, 16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast<int>(variant))); + EchoCanceller3Tester(rate).RunCaptureSaturationVerificationTest(variant); + } + } +} + +TEST(EchoCanceller3Messaging, EchoPathChange) { + auto variants = { + EchoCanceller3Tester::EchoPathChangeTestVariant::kNone, + EchoCanceller3Tester::EchoPathChangeTestVariant::kOneSticky, + EchoCanceller3Tester::EchoPathChangeTestVariant::kOneNonSticky}; + for (auto rate : {8000, 16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast<int>(variant))); + EchoCanceller3Tester(rate).RunEchoPathChangeVerificationTest(variant); + } + } +} + +TEST(EchoCanceller3Messaging, EchoLeakage) { + auto variants = { + EchoCanceller3Tester::EchoLeakageTestVariant::kNone, + EchoCanceller3Tester::EchoLeakageTestVariant::kFalseSticky, + EchoCanceller3Tester::EchoLeakageTestVariant::kTrueSticky, + EchoCanceller3Tester::EchoLeakageTestVariant::kTrueNonSticky}; + for (auto rate : {8000, 16000, 32000, 48000}) { + for (auto variant : variants) { + SCOPED_TRACE(ProduceDebugText(rate, static_cast<int>(variant))); + EchoCanceller3Tester(rate).RunEchoLeakageVerificationTest(variant); + } + } +} + +TEST(EchoCanceller3, ConfigValidation) { + EchoCanceller3Config config; + EXPECT_TRUE(EchoCanceller3::Validate(config)); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +TEST(EchoCanceller3InputCheck, WrongCaptureNumBandsCheckVerification) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunProcessCaptureNumBandsCheckVerification(); + } +} + +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoCanceller3InputCheck, + DISABLED_WrongRenderFrameLengthCheckVerification) { + for (auto rate : {8000, 16000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunAnalyzeRenderFrameLengthCheckVerification(); + } +} + +TEST(EchoCanceller3InputCheck, WrongCaptureFrameLengthCheckVerification) { + for (auto rate : {8000, 16000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + EchoCanceller3Tester(rate).RunProcessCaptureFrameLengthCheckVerification(); + } +} + +// Verifiers that the verification for null input to the render analysis api +// call works. +TEST(EchoCanceller3InputCheck, NullRenderAnalysisParameter) { + EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 8000, false) + .AnalyzeRender(nullptr), + ""); +} + +// Verifiers that the verification for null input to the capture analysis api +// call works. +TEST(EchoCanceller3InputCheck, NullCaptureAnalysisParameter) { + EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 8000, false) + .AnalyzeCapture(nullptr), + ""); +} + +// Verifiers that the verification for null input to the capture processing api +// call works. +TEST(EchoCanceller3InputCheck, NullCaptureProcessingParameter) { + EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 8000, false) + .ProcessCapture(nullptr, false), + ""); +} + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoCanceller3InputCheck, DISABLED_WrongSampleRate) { + ApmDataDumper data_dumper(0); + EXPECT_DEATH(EchoCanceller3(EchoCanceller3Config(), 8001, false), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc new file mode 100644 index 0000000000..914f2d28eb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" + +#include <algorithm> +#include <array> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +EchoPathDelayEstimator::EchoPathDelayEstimator( + ApmDataDumper* data_dumper, + const EchoCanceller3Config& config) + : data_dumper_(data_dumper), + down_sampling_factor_(config.delay.down_sampling_factor), + sub_block_size_(down_sampling_factor_ != 0 + ? kBlockSize / down_sampling_factor_ + : kBlockSize), + capture_decimator_(down_sampling_factor_), + matched_filter_(data_dumper_, + DetectOptimization(), + sub_block_size_, + kMatchedFilterWindowSizeSubBlocks, + config.delay.num_filters, + kMatchedFilterAlignmentShiftSizeSubBlocks, + config.render_levels.poor_excitation_render_limit), + matched_filter_lag_aggregator_(data_dumper_, + matched_filter_.GetMaxFilterLag()) { + RTC_DCHECK(data_dumper); + RTC_DCHECK(down_sampling_factor_ > 0); +} + +EchoPathDelayEstimator::~EchoPathDelayEstimator() = default; + +void EchoPathDelayEstimator::Reset() { + matched_filter_lag_aggregator_.Reset(); + matched_filter_.Reset(); +} + +rtc::Optional<size_t> EchoPathDelayEstimator::EstimateDelay( + const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture) { + RTC_DCHECK_EQ(kBlockSize, capture.size()); + + std::array<float, kBlockSize> downsampled_capture_data; + rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(), + sub_block_size_); + data_dumper_->DumpWav("aec3_capture_decimator_input", capture.size(), + capture.data(), 16000, 1); + capture_decimator_.Decimate(capture, downsampled_capture); + data_dumper_->DumpWav("aec3_capture_decimator_output", + downsampled_capture.size(), downsampled_capture.data(), + 16000 / down_sampling_factor_, 1); + matched_filter_.Update(render_buffer, downsampled_capture); + + rtc::Optional<size_t> aggregated_matched_filter_lag = + matched_filter_lag_aggregator_.Aggregate( + matched_filter_.GetLagEstimates()); + + // TODO(peah): Move this logging outside of this class once EchoCanceller3 + // development is done. + data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_delay", + aggregated_matched_filter_lag + ? static_cast<int>(*aggregated_matched_filter_lag * + down_sampling_factor_) + : -1); + + // Return the detected delay in samples as the aggregated matched filter lag + // compensated by the down sampling factor for the signal being correlated. + return aggregated_matched_filter_lag + ? rtc::Optional<size_t>(*aggregated_matched_filter_lag * + down_sampling_factor_) + : rtc::nullopt; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h new file mode 100644 index 0000000000..04943ca4ba --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ + +#include <vector> + +#include "api/optional.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/matched_filter.h" +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +// Estimates the delay of the echo path. +class EchoPathDelayEstimator { + public: + EchoPathDelayEstimator(ApmDataDumper* data_dumper, + const EchoCanceller3Config& config); + ~EchoPathDelayEstimator(); + + // Resets the estimation. + void Reset(); + + // Produce a delay estimate if such is avaliable. + rtc::Optional<size_t> EstimateDelay( + const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture); + + // Log delay estimator properties. + void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const { + matched_filter_.LogFilterProperties(sample_rate_hz, shift, + down_sampling_factor_); + } + + private: + ApmDataDumper* const data_dumper_; + const size_t down_sampling_factor_; + const size_t sub_block_size_; + Decimator capture_decimator_; + MatchedFilter matched_filter_; + MatchedFilterLagAggregator matched_filter_lag_aggregator_; + + RTC_DISALLOW_COPY_AND_ASSIGN(EchoPathDelayEstimator); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc new file mode 100644 index 0000000000..2dbdb1ccf4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" + +#include <algorithm> +#include <sstream> +#include <string> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(size_t delay) { + std::ostringstream ss; + ss << "Delay: " << delay; + return ss.str(); +} + +} // namespace + +// Verifies that the basic API calls work. +TEST(EchoPathDelayEstimator, BasicApiCalls) { + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + EchoPathDelayEstimator estimator(&data_dumper, config); + std::vector<std::vector<float>> render(3, std::vector<float>(kBlockSize)); + std::vector<float> capture(kBlockSize); + for (size_t k = 0; k < 100; ++k) { + render_delay_buffer->Insert(render); + estimator.EstimateDelay(render_delay_buffer->GetDownsampledRenderBuffer(), + capture); + } +} + +// Verifies that the delay estimator produces correct delay for artificially +// delayed signals. +TEST(EchoPathDelayEstimator, DelayEstimation) { + Random random_generator(42U); + std::vector<std::vector<float>> render(3, std::vector<float>(kBlockSize)); + std::vector<float> capture(kBlockSize); + ApmDataDumper data_dumper(0); + constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; + for (auto down_sampling_factor : kDownSamplingFactors) { + EchoCanceller3Config config; + config.delay.down_sampling_factor = down_sampling_factor; + config.delay.num_filters = 10; + for (size_t delay_samples : {30, 64, 150, 200, 800, 4000}) { + SCOPED_TRACE(ProduceDebugText(delay_samples)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + DelayBuffer<float> signal_delay_buffer(delay_samples); + EchoPathDelayEstimator estimator(&data_dumper, config); + + rtc::Optional<size_t> estimated_delay_samples; + for (size_t k = 0; k < (300 + delay_samples / kBlockSize); ++k) { + RandomizeSampleVector(&random_generator, render[0]); + signal_delay_buffer.Delay(render[0], capture); + render_delay_buffer->Insert(render); + render_delay_buffer->UpdateBuffers(); + estimated_delay_samples = estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture); + } + if (estimated_delay_samples) { + // Due to the internal down-sampling done inside the delay estimator + // the estimated delay cannot be expected to be exact to the true delay. + EXPECT_NEAR(delay_samples, *estimated_delay_samples, + config.delay.down_sampling_factor); + } else { + ADD_FAILURE(); + } + } + } +} + +// Verifies that the delay estimator does not produce delay estimates too +// quickly. +TEST(EchoPathDelayEstimator, NoInitialDelayestimates) { + Random random_generator(42U); + EchoCanceller3Config config; + std::vector<std::vector<float>> render(3, std::vector<float>(kBlockSize)); + std::vector<float> capture(kBlockSize); + ApmDataDumper data_dumper(0); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + + EchoPathDelayEstimator estimator(&data_dumper, config); + for (size_t k = 0; k < 19; ++k) { + RandomizeSampleVector(&random_generator, render[0]); + std::copy(render[0].begin(), render[0].end(), capture.begin()); + render_delay_buffer->Insert(render); + render_delay_buffer->UpdateBuffers(); + EXPECT_FALSE(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture)); + } +} + +// Verifies that the delay estimator does not produce delay estimates for render +// signals of low level. +TEST(EchoPathDelayEstimator, NoDelayEstimatesForLowLevelRenderSignals) { + Random random_generator(42U); + EchoCanceller3Config config; + std::vector<std::vector<float>> render(3, std::vector<float>(kBlockSize)); + std::vector<float> capture(kBlockSize); + ApmDataDumper data_dumper(0); + EchoPathDelayEstimator estimator(&data_dumper, config); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, render[0]); + for (auto& render_k : render[0]) { + render_k *= 100.f / 32767.f; + } + std::copy(render[0].begin(), render[0].end(), capture.begin()); + render_delay_buffer->Insert(render); + render_delay_buffer->UpdateBuffers(); + EXPECT_FALSE(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture)); + } +} + +// Verifies that the delay estimator does not produce delay estimates for +// uncorrelated signals. +TEST(EchoPathDelayEstimator, NoDelayEstimatesForUncorrelatedSignals) { + Random random_generator(42U); + EchoCanceller3Config config; + std::vector<std::vector<float>> render(3, std::vector<float>(kBlockSize)); + std::vector<float> capture(kBlockSize); + ApmDataDumper data_dumper(0); + EchoPathDelayEstimator estimator(&data_dumper, config); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, render[0]); + RandomizeSampleVector(&random_generator, capture); + render_delay_buffer->Insert(render); + render_delay_buffer->UpdateBuffers(); + EXPECT_FALSE(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture)); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the render blocksize. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoPathDelayEstimator, DISABLED_WrongRenderBlockSize) { + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EchoPathDelayEstimator estimator(&data_dumper, config); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + std::vector<float> capture(kBlockSize); + EXPECT_DEATH(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture), + ""); +} + +// Verifies the check for the capture blocksize. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoPathDelayEstimator, WrongCaptureBlockSize) { + ApmDataDumper data_dumper(0); + EchoCanceller3Config config; + EchoPathDelayEstimator estimator(&data_dumper, config); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, config.delay.down_sampling_factor, + GetDownSampledBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters), + GetRenderDelayBufferSize(config.delay.down_sampling_factor, + config.delay.num_filters))); + std::vector<float> capture(std::vector<float>(kBlockSize - 1)); + EXPECT_DEATH(estimator.EstimateDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture), + ""); +} + +// Verifies the check for non-null data dumper. +TEST(EchoPathDelayEstimator, NullDataDumper) { + EXPECT_DEATH(EchoPathDelayEstimator(nullptr, EchoCanceller3Config()), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability.cc new file mode 100644 index 0000000000..f63a83006e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_variability.h" + +namespace webrtc { + +EchoPathVariability::EchoPathVariability(bool gain_change, bool delay_change) + : gain_change(gain_change), delay_change(delay_change) {} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability.h new file mode 100644 index 0000000000..55915d5b68 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ + +namespace webrtc { + +struct EchoPathVariability { + EchoPathVariability(bool gain_change, bool delay_change); + + bool AudioPathChanged() const { return gain_change || delay_change; } + bool gain_change; + bool delay_change; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc new file mode 100644 index 0000000000..9a1df78885 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability_unittest.cc @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoPathVariability, CorrectBehavior) { + // Test correct passing and reporting of the gain change information. + EchoPathVariability v(true, true); + EXPECT_TRUE(v.gain_change); + EXPECT_TRUE(v.delay_change); + EXPECT_TRUE(v.AudioPathChanged()); + + v = EchoPathVariability(true, false); + EXPECT_TRUE(v.gain_change); + EXPECT_FALSE(v.delay_change); + EXPECT_TRUE(v.AudioPathChanged()); + + v = EchoPathVariability(false, true); + EXPECT_FALSE(v.gain_change); + EXPECT_TRUE(v.delay_change); + EXPECT_TRUE(v.AudioPathChanged()); + + v = EchoPathVariability(false, false); + EXPECT_FALSE(v.gain_change); + EXPECT_FALSE(v.delay_change); + EXPECT_FALSE(v.AudioPathChanged()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover.cc new file mode 100644 index 0000000000..9adcec51f4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover.cc @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/echo_remover.h" + +#include <math.h> +#include <algorithm> +#include <memory> +#include <numeric> +#include <string> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/comfort_noise_generator.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover_metrics.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/output_selector.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/residual_echo_estimator.h" +#include "modules/audio_processing/aec3/subtractor.h" +#include "modules/audio_processing/aec3/suppression_filter.h" +#include "modules/audio_processing/aec3/suppression_gain.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +namespace { + +void LinearEchoPower(const FftData& E, + const FftData& Y, + std::array<float, kFftLengthBy2Plus1>* S2) { + for (size_t k = 0; k < E.re.size(); ++k) { + (*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) + + (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]); + } +} + +// Class for removing the echo from the capture signal. +class EchoRemoverImpl final : public EchoRemover { + public: + explicit EchoRemoverImpl(const EchoCanceller3Config& config, + int sample_rate_hz); + ~EchoRemoverImpl() override; + + void GetMetrics(EchoControl::Metrics* metrics) const override; + + // Removes the echo from a block of samples from the capture signal. The + // supplied render signal is assumed to be pre-aligned with the capture + // signal. + void ProcessCapture(const rtc::Optional<size_t>& echo_path_delay_samples, + const EchoPathVariability& echo_path_variability, + bool capture_signal_saturation, + const RenderBuffer& render_buffer, + std::vector<std::vector<float>>* capture) override; + + // Updates the status on whether echo leakage is detected in the output of the + // echo remover. + void UpdateEchoLeakageStatus(bool leakage_detected) override { + echo_leakage_detected_ = leakage_detected; + } + + private: + static int instance_count_; + const EchoCanceller3Config config_; + const Aec3Fft fft_; + std::unique_ptr<ApmDataDumper> data_dumper_; + const Aec3Optimization optimization_; + const int sample_rate_hz_; + Subtractor subtractor_; + SuppressionGain suppression_gain_; + ComfortNoiseGenerator cng_; + SuppressionFilter suppression_filter_; + RenderSignalAnalyzer render_signal_analyzer_; + OutputSelector output_selector_; + ResidualEchoEstimator residual_echo_estimator_; + bool echo_leakage_detected_ = false; + AecState aec_state_; + EchoRemoverMetrics metrics_; + + RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl); +}; + +int EchoRemoverImpl::instance_count_ = 0; + +EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, + int sample_rate_hz) + : config_(config), + fft_(), + data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + optimization_(DetectOptimization()), + sample_rate_hz_(sample_rate_hz), + subtractor_(data_dumper_.get(), optimization_), + suppression_gain_(config_, optimization_), + cng_(optimization_), + suppression_filter_(sample_rate_hz_), + residual_echo_estimator_(config_), + aec_state_(config_) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); +} + +EchoRemoverImpl::~EchoRemoverImpl() = default; + +void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const { + // Echo return loss (ERL) is inverted to go from gain to attenuation. + metrics->echo_return_loss = -10.0 * log10(aec_state_.ErlTimeDomain()); + metrics->echo_return_loss_enhancement = + 10.0 * log10(aec_state_.ErleTimeDomain()); +} + +void EchoRemoverImpl::ProcessCapture( + const rtc::Optional<size_t>& echo_path_delay_samples, + const EchoPathVariability& echo_path_variability, + bool capture_signal_saturation, + const RenderBuffer& render_buffer, + std::vector<std::vector<float>>* capture) { + const std::vector<std::vector<float>>& x = render_buffer.MostRecentBlock(); + std::vector<std::vector<float>>* y = capture; + + RTC_DCHECK(y); + RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_)); + RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_)); + RTC_DCHECK_EQ(x[0].size(), kBlockSize); + RTC_DCHECK_EQ((*y)[0].size(), kBlockSize); + const std::vector<float>& x0 = x[0]; + std::vector<float>& y0 = (*y)[0]; + + data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize, &y0[0], + LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0], + LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpRaw("aec3_echo_remover_capture_input", y0); + data_dumper_->DumpRaw("aec3_echo_remover_render_input", x0); + + aec_state_.UpdateCaptureSaturation(capture_signal_saturation); + + if (echo_path_variability.AudioPathChanged()) { + subtractor_.HandleEchoPathChange(echo_path_variability); + aec_state_.HandleEchoPathChange(echo_path_variability); + } + + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kFftLengthBy2Plus1> R2; + std::array<float, kFftLengthBy2Plus1> S2_linear; + std::array<float, kFftLengthBy2Plus1> G; + float high_bands_gain; + FftData Y; + FftData comfort_noise; + FftData high_band_comfort_noise; + SubtractorOutput subtractor_output; + FftData& E_main = subtractor_output.E_main; + auto& E2_main = subtractor_output.E2_main; + auto& E2_shadow = subtractor_output.E2_shadow; + auto& e_main = subtractor_output.e_main; + + // Analyze the render signal. + render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay()); + + // Perform linear echo cancellation. + subtractor_.Process(render_buffer, y0, render_signal_analyzer_, aec_state_, + &subtractor_output); + + // Compute spectra. + fft_.ZeroPaddedFft(y0, &Y); + LinearEchoPower(E_main, Y, &S2_linear); + Y.Spectrum(optimization_, &Y2); + + // Update the AEC state information. + aec_state_.Update(subtractor_.FilterFrequencyResponse(), + subtractor_.FilterImpulseResponse(), + subtractor_.ConvergedFilter(), echo_path_delay_samples, + render_buffer, E2_main, Y2, x0, subtractor_output.s_main, + echo_leakage_detected_); + + // Choose the linear output. + output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0); + data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], + LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpRaw("aec3_output_linear", y0); + const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2; + + // Estimate the residual echo power. + residual_echo_estimator_.Estimate(aec_state_, render_buffer, S2_linear, Y2, + &R2); + + // Estimate the comfort noise. + cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); + + // A choose and apply echo suppression gain. + suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), + render_signal_analyzer_, aec_state_, x, + &high_bands_gain, &G); + suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, + high_bands_gain, y); + + // Update the metrics. + metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G); + + // Update the aec state with the aec output characteristics. + aec_state_.UpdateWithOutput(y0); + + // Debug outputs for the purpose of development and analysis. + data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize, + &subtractor_output.s_main[0], + LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpRaw("aec3_output", y0); + data_dumper_->DumpRaw("aec3_narrow_render", + render_signal_analyzer_.NarrowPeakBand() ? 1 : 0); + data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()); + data_dumper_->DumpRaw("aec3_suppressor_gain", G); + data_dumper_->DumpWav("aec3_output", + rtc::ArrayView<const float>(&y0[0], kBlockSize), + LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpRaw("aec3_using_subtractor_output", + output_selector_.UseSubtractorOutput() ? 1 : 0); + data_dumper_->DumpRaw("aec3_E2", E2); + data_dumper_->DumpRaw("aec3_E2_main", E2_main); + data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow); + data_dumper_->DumpRaw("aec3_S2_linear", S2_linear); + data_dumper_->DumpRaw("aec3_Y2", Y2); + data_dumper_->DumpRaw("aec3_X2", render_buffer.Spectrum(0)); + data_dumper_->DumpRaw("aec3_R2", R2); + data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle()); + data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl()); + data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender()); + data_dumper_->DumpRaw("aec3_usable_linear_estimate", + aec_state_.UsableLinearEstimate()); + data_dumper_->DumpRaw( + "aec3_filter_delay", + aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1); + data_dumper_->DumpRaw( + "aec3_external_delay", + aec_state_.ExternalDelay() ? *aec_state_.ExternalDelay() : -1); + data_dumper_->DumpRaw("aec3_capture_saturation", + aec_state_.SaturatedCapture() ? 1 : 0); +} + +} // namespace + +EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config, + int sample_rate_hz) { + return new EchoRemoverImpl(config, sample_rate_hz); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover.h new file mode 100644 index 0000000000..7411b5c45c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ + +#include <vector> + +#include "api/optional.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +// Class for removing the echo from the capture signal. +class EchoRemover { + public: + static EchoRemover* Create(const EchoCanceller3Config& config, + int sample_rate_hz); + virtual ~EchoRemover() = default; + + // Get current metrics. + virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0; + + // Removes the echo from a block of samples from the capture signal. The + // supplied render signal is assumed to be pre-aligned with the capture + // signal. + virtual void ProcessCapture( + const rtc::Optional<size_t>& echo_path_delay_samples, + const EchoPathVariability& echo_path_variability, + bool capture_signal_saturation, + const RenderBuffer& render_buffer, + std::vector<std::vector<float>>* capture) = 0; + + // Updates the status on whether echo leakage is detected in the output of the + // echo remover. + virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc new file mode 100644 index 0000000000..3a71299b1e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover_metrics.h" + +#include <math.h> +#include <algorithm> +#include <numeric> + +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +constexpr float kOneByMetricsCollectionBlocks = 1.f / kMetricsCollectionBlocks; + +} // namespace + +EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {} +EchoRemoverMetrics::DbMetric::DbMetric(float sum_value, + float floor_value, + float ceil_value) + : sum_value(sum_value), floor_value(floor_value), ceil_value(ceil_value) {} + +void EchoRemoverMetrics::DbMetric::Update(float value) { + sum_value += value; + floor_value = std::min(floor_value, value); + ceil_value = std::max(ceil_value, value); +} + +EchoRemoverMetrics::EchoRemoverMetrics() { + ResetMetrics(); +} + +void EchoRemoverMetrics::ResetMetrics() { + erl_.fill(DbMetric(0.f, 10000.f, 0.000f)); + erle_.fill(DbMetric(0.f, 0.f, 1000.f)); + comfort_noise_.fill(DbMetric(0.f, 100000000.f, 0.f)); + suppressor_gain_.fill(DbMetric(0.f, 1.f, 0.f)); + active_render_count_ = 0; + saturated_capture_ = false; +} + +void EchoRemoverMetrics::Update( + const AecState& aec_state, + const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum, + const std::array<float, kFftLengthBy2Plus1>& suppressor_gain) { + metrics_reported_ = false; + if (++block_counter_ <= kMetricsCollectionBlocks) { + aec3::UpdateDbMetric(aec_state.Erl(), &erl_); + aec3::UpdateDbMetric(aec_state.Erle(), &erle_); + aec3::UpdateDbMetric(comfort_noise_spectrum, &comfort_noise_); + aec3::UpdateDbMetric(suppressor_gain, &suppressor_gain_); + active_render_count_ += (aec_state.ActiveRender() ? 1 : 0); + saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture(); + } else { + // Report the metrics over several frames in order to lower the impact of + // the logarithms involved on the computational complexity. + constexpr int kMetricsCollectionBlocksBy2 = kMetricsCollectionBlocks / 2; + constexpr float kComfortNoiseScaling = 1.f / (kBlockSize * kBlockSize); + switch (block_counter_) { + case kMetricsCollectionBlocks + 1: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErleBand0.Average", + aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, + kOneByMetricsCollectionBlocks, + erle_[0].sum_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErleBand0.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f, + erle_[0].ceil_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErleBand0.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f, + erle_[0].floor_value), + 0, 19, 20); + break; + case kMetricsCollectionBlocks + 2: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErleBand1.Average", + aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, + kOneByMetricsCollectionBlocks, + erle_[1].sum_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErleBand1.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f, + erle_[1].ceil_value), + 0, 19, 20); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErleBand1.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f, + erle_[1].floor_value), + 0, 19, 20); + break; + case kMetricsCollectionBlocks + 3: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErlBand0.Average", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, + kOneByMetricsCollectionBlocks, + erl_[0].sum_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErlBand0.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_[0].ceil_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErlBand0.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_[0].floor_value), + 0, 59, 30); + break; + case kMetricsCollectionBlocks + 4: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErlBand1.Average", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, + kOneByMetricsCollectionBlocks, + erl_[1].sum_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErlBand1.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_[1].ceil_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ErlBand1.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f, + erl_[1].floor_value), + 0, 59, 30); + break; + case kMetricsCollectionBlocks + 5: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ComfortNoiseBand0.Average", + aec3::TransformDbMetricForReporting( + true, 0.f, 89.f, -90.3f, + kComfortNoiseScaling * kOneByMetricsCollectionBlocks, + comfort_noise_[0].sum_value), + 0, 89, 45); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ComfortNoiseBand0.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 89.f, -90.3f, + kComfortNoiseScaling, + comfort_noise_[0].ceil_value), + 0, 89, 45); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ComfortNoiseBand0.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 89.f, -90.3f, + kComfortNoiseScaling, + comfort_noise_[0].floor_value), + 0, 89, 45); + break; + case kMetricsCollectionBlocks + 6: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ComfortNoiseBand1.Average", + aec3::TransformDbMetricForReporting( + true, 0.f, 89.f, -90.3f, + kComfortNoiseScaling * kOneByMetricsCollectionBlocks, + comfort_noise_[1].sum_value), + 0, 89, 45); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ComfortNoiseBand1.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 89.f, -90.3f, + kComfortNoiseScaling, + comfort_noise_[1].ceil_value), + 0, 89, 45); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.ComfortNoiseBand1.Min", + aec3::TransformDbMetricForReporting(true, 0.f, 89.f, -90.3f, + kComfortNoiseScaling, + comfort_noise_[1].floor_value), + 0, 89, 45); + break; + case kMetricsCollectionBlocks + 7: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.SuppressorGainBand0.Average", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 0.f, + kOneByMetricsCollectionBlocks, + suppressor_gain_[0].sum_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.SuppressorGainBand0.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 0.f, 1.f, + suppressor_gain_[0].ceil_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.SuppressorGainBand0.Min", + aec3::TransformDbMetricForReporting( + true, 0.f, 59.f, 0.f, 1.f, suppressor_gain_[0].floor_value), + 0, 59, 30); + break; + case kMetricsCollectionBlocks + 8: + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.SuppressorGainBand1.Average", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 0.f, + kOneByMetricsCollectionBlocks, + suppressor_gain_[1].sum_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.SuppressorGainBand1.Max", + aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 0.f, 1.f, + suppressor_gain_[1].ceil_value), + 0, 59, 30); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.SuppressorGainBand1.Min", + aec3::TransformDbMetricForReporting( + true, 0.f, 59.f, 0.f, 1.f, suppressor_gain_[1].floor_value), + 0, 59, 30); + break; + case kMetricsCollectionBlocks + 9: + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.UsableLinearEstimate", + static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0)); + RTC_HISTOGRAM_BOOLEAN( + "WebRTC.Audio.EchoCanceller.ActiveRender", + static_cast<int>( + active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0)); + RTC_HISTOGRAM_COUNTS_LINEAR( + "WebRTC.Audio.EchoCanceller.FilterDelay", + aec_state.FilterDelay() ? *aec_state.FilterDelay() + 1 : 0, 0, 30, + 31); + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation", + static_cast<int>(saturated_capture_ ? 1 : 0)); + metrics_reported_ = true; + RTC_DCHECK_EQ(kMetricsReportingIntervalBlocks, block_counter_); + block_counter_ = 0; + ResetMetrics(); + break; + default: + RTC_NOTREACHED(); + break; + } + } +} + +namespace aec3 { + +void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value, + std::array<EchoRemoverMetrics::DbMetric, 2>* statistic) { + RTC_DCHECK(statistic); + // Truncation is intended in the band width computation. + constexpr int kNumBands = 2; + constexpr int kBandWidth = 65 / kNumBands; + constexpr float kOneByBandWidth = 1.f / kBandWidth; + RTC_DCHECK_EQ(kNumBands, statistic->size()); + RTC_DCHECK_EQ(65, value.size()); + for (size_t k = 0; k < statistic->size(); ++k) { + float average_band = + std::accumulate(value.begin() + kBandWidth * k, + value.begin() + kBandWidth * (k + 1), 0.f) * + kOneByBandWidth; + (*statistic)[k].Update(average_band); + } +} + +int TransformDbMetricForReporting(bool negate, + float min_value, + float max_value, + float offset, + float scaling, + float value) { + float new_value = 10.f * log10(value * scaling + 1e-10f) + offset; + if (negate) { + new_value = -new_value; + } + return static_cast<int>(rtc::SafeClamp(new_value, min_value, max_value)); +} + +} // namespace aec3 + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics.h new file mode 100644 index 0000000000..6548ae8aa2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ + +#include "modules/audio_processing/aec3/aec_state.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Handles the reporting of metrics for the echo remover. +class EchoRemoverMetrics { + public: + struct DbMetric { + DbMetric(); + DbMetric(float sum_value, float floor_value, float ceil_value); + void Update(float value); + float sum_value; + float floor_value; + float ceil_value; + }; + + EchoRemoverMetrics(); + + // Updates the metric with new data. + void Update( + const AecState& aec_state, + const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum, + const std::array<float, kFftLengthBy2Plus1>& suppressor_gain); + + // Returns true if the metrics have just been reported, otherwise false. + bool MetricsReported() { return metrics_reported_; } + + private: + // Resets the metrics. + void ResetMetrics(); + + int block_counter_ = 0; + std::array<DbMetric, 2> erl_; + std::array<DbMetric, 2> erle_; + std::array<DbMetric, 2> comfort_noise_; + std::array<DbMetric, 2> suppressor_gain_; + int active_render_count_ = 0; + bool saturated_capture_ = false; + bool metrics_reported_ = false; + + RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverMetrics); +}; + +namespace aec3 { + +// Updates a banded metric of type DbMetric with the values in the supplied +// array. +void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value, + std::array<EchoRemoverMetrics::DbMetric, 2>* statistic); + +// Transforms a DbMetric from the linear domain into the logarithmic domain. +int TransformDbMetricForReporting(bool negate, + float min_value, + float max_value, + float offset, + float scaling, + float value); + +} // namespace aec3 + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc new file mode 100644 index 0000000000..b8b20d7722 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover_metrics.h" + +#include <math.h> + +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null input. +TEST(UpdateDbMetric, NullValue) { + std::array<float, kFftLengthBy2Plus1> value; + value.fill(0.f); + EXPECT_DEATH(aec3::UpdateDbMetric(value, nullptr), ""); +} + +#endif + +// Verifies the updating functionality of UpdateDbMetric. +TEST(UpdateDbMetric, Updating) { + std::array<float, kFftLengthBy2Plus1> value; + std::array<EchoRemoverMetrics::DbMetric, 2> statistic; + statistic.fill(EchoRemoverMetrics::DbMetric(0.f, 100.f, -100.f)); + constexpr float kValue0 = 10.f; + constexpr float kValue1 = 20.f; + std::fill(value.begin(), value.begin() + 32, kValue0); + std::fill(value.begin() + 32, value.begin() + 64, kValue1); + + aec3::UpdateDbMetric(value, &statistic); + EXPECT_FLOAT_EQ(kValue0, statistic[0].sum_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].ceil_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].floor_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].sum_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].ceil_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].floor_value); + + aec3::UpdateDbMetric(value, &statistic); + EXPECT_FLOAT_EQ(2.f * kValue0, statistic[0].sum_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].ceil_value); + EXPECT_FLOAT_EQ(kValue0, statistic[0].floor_value); + EXPECT_FLOAT_EQ(2.f * kValue1, statistic[1].sum_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].ceil_value); + EXPECT_FLOAT_EQ(kValue1, statistic[1].floor_value); +} + +// Verifies that the TransformDbMetricForReporting method produces the desired +// output for values for dBFS. +TEST(TransformDbMetricForReporting, DbFsScaling) { + std::array<float, kBlockSize> x; + FftData X; + std::array<float, kFftLengthBy2Plus1> X2; + Aec3Fft fft; + x.fill(1000.f); + fft.ZeroPaddedFft(x, &X); + X.Spectrum(Aec3Optimization::kNone, &X2); + + float offset = -10.f * log10(32768.f * 32768.f); + EXPECT_NEAR(offset, -90.3f, 0.1f); + EXPECT_EQ( + static_cast<int>(30.3f), + aec3::TransformDbMetricForReporting( + true, 0.f, 90.f, offset, 1.f / (kBlockSize * kBlockSize), X2[0])); +} + +// Verifies that the TransformDbMetricForReporting method is able to properly +// limit the output. +TEST(TransformDbMetricForReporting, Limits) { + EXPECT_EQ( + 0, + aec3::TransformDbMetricForReporting(false, 0.f, 10.f, 0.f, 1.f, 0.001f)); + EXPECT_EQ( + 10, + aec3::TransformDbMetricForReporting(false, 0.f, 10.f, 0.f, 1.f, 100.f)); +} + +// Verifies that the TransformDbMetricForReporting method is able to properly +// negate output. +TEST(TransformDbMetricForReporting, Negate) { + EXPECT_EQ( + 10, + aec3::TransformDbMetricForReporting(true, -20.f, 20.f, 0.f, 1.f, 0.1f)); + EXPECT_EQ( + -10, + aec3::TransformDbMetricForReporting(true, -20.f, 20.f, 0.f, 1.f, 10.f)); +} + +// Verify the Update functionality of DbMetric. +TEST(DbMetric, Update) { + EchoRemoverMetrics::DbMetric metric(0.f, 20.f, -20.f); + constexpr int kNumValues = 100; + constexpr float kValue = 10.f; + for (int k = 0; k < kNumValues; ++k) { + metric.Update(kValue); + } + EXPECT_FLOAT_EQ(kValue * kNumValues, metric.sum_value); + EXPECT_FLOAT_EQ(kValue, metric.ceil_value); + EXPECT_FLOAT_EQ(kValue, metric.floor_value); +} + +// Verify the constructor functionality of DbMetric. +TEST(DbMetric, Constructor) { + EchoRemoverMetrics::DbMetric metric; + EXPECT_FLOAT_EQ(0.f, metric.sum_value); + EXPECT_FLOAT_EQ(0.f, metric.ceil_value); + EXPECT_FLOAT_EQ(0.f, metric.floor_value); + + metric = EchoRemoverMetrics::DbMetric(1.f, 2.f, 3.f); + EXPECT_FLOAT_EQ(1.f, metric.sum_value); + EXPECT_FLOAT_EQ(2.f, metric.floor_value); + EXPECT_FLOAT_EQ(3.f, metric.ceil_value); +} + +// Verify the general functionality of EchoRemoverMetrics. +TEST(EchoRemoverMetrics, NormalUsage) { + EchoRemoverMetrics metrics; + AecState aec_state(EchoCanceller3Config{}); + std::array<float, kFftLengthBy2Plus1> comfort_noise_spectrum; + std::array<float, kFftLengthBy2Plus1> suppressor_gain; + comfort_noise_spectrum.fill(10.f); + suppressor_gain.fill(1.f); + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.Update(aec_state, comfort_noise_spectrum, suppressor_gain); + EXPECT_FALSE(metrics.MetricsReported()); + } + metrics.Update(aec_state, comfort_noise_spectrum, suppressor_gain); + EXPECT_TRUE(metrics.MetricsReported()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_unittest.cc new file mode 100644 index 0000000000..24b50e8c95 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_unittest.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/echo_remover.h" + +#include <algorithm> +#include <memory> +#include <numeric> +#include <sstream> +#include <string> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +std::string ProduceDebugText(int sample_rate_hz, int delay) { + std::ostringstream ss(ProduceDebugText(sample_rate_hz)); + ss << ", Delay: " << delay; + return ss.str(); +} + +constexpr size_t kDownSamplingFactor = 4; +constexpr size_t kNumMatchedFilters = 4; + +} // namespace + +// Verifies the basic API call sequence +TEST(EchoRemover, BasicApiCalls) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<EchoRemover> remover( + EchoRemover::Create(EchoCanceller3Config(), rate)); + std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create( + NumBandsForRate(rate), kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + + std::vector<std::vector<float>> render(NumBandsForRate(rate), + std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> capture( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + for (size_t k = 0; k < 100; ++k) { + EchoPathVariability echo_path_variability(k % 3 == 0 ? true : false, + k % 5 == 0 ? true : false); + rtc::Optional<size_t> echo_path_delay_samples = + (k % 6 == 0 ? rtc::Optional<size_t>(k * 10) + : rtc::nullopt); + render_buffer->Insert(render); + render_buffer->UpdateBuffers(); + remover->ProcessCapture(echo_path_delay_samples, echo_path_variability, + k % 2 == 0 ? true : false, + render_buffer->GetRenderBuffer(), &capture); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the samplerate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(EchoRemover, DISABLED_WrongSampleRate) { + EXPECT_DEATH(std::unique_ptr<EchoRemover>( + EchoRemover::Create(EchoCanceller3Config(), 8001)), + ""); +} + +// Verifies the check for the capture block size. +TEST(EchoRemover, WrongCaptureBlockSize) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<EchoRemover> remover( + EchoRemover::Create(EchoCanceller3Config(), rate)); + std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create( + NumBandsForRate(rate), kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + std::vector<std::vector<float>> capture( + NumBandsForRate(rate), std::vector<float>(kBlockSize - 1, 0.f)); + EchoPathVariability echo_path_variability(false, false); + rtc::Optional<size_t> echo_path_delay_samples; + EXPECT_DEATH(remover->ProcessCapture( + echo_path_delay_samples, echo_path_variability, false, + render_buffer->GetRenderBuffer(), &capture), + ""); + } +} + +// Verifies the check for the number of capture bands. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed.c +TEST(EchoRemover, DISABLED_WrongCaptureNumBands) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<EchoRemover> remover( + EchoRemover::Create(EchoCanceller3Config(), rate)); + std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create( + NumBandsForRate(rate), kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + std::vector<std::vector<float>> capture( + NumBandsForRate(rate == 48000 ? 16000 : rate + 16000), + std::vector<float>(kBlockSize, 0.f)); + EchoPathVariability echo_path_variability(false, false); + rtc::Optional<size_t> echo_path_delay_samples; + EXPECT_DEATH(remover->ProcessCapture( + echo_path_delay_samples, echo_path_variability, false, + render_buffer->GetRenderBuffer(), &capture), + ""); + } +} + +// Verifies the check for non-null capture block. +TEST(EchoRemover, NullCapture) { + std::unique_ptr<EchoRemover> remover( + EchoRemover::Create(EchoCanceller3Config(), 8000)); + std::unique_ptr<RenderDelayBuffer> render_buffer(RenderDelayBuffer::Create( + 3, kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + EchoPathVariability echo_path_variability(false, false); + rtc::Optional<size_t> echo_path_delay_samples; + EXPECT_DEATH( + remover->ProcessCapture(echo_path_delay_samples, echo_path_variability, + false, render_buffer->GetRenderBuffer(), nullptr), + ""); +} + +#endif + +// Performs a sanity check that the echo_remover is able to properly +// remove echoes. +TEST(EchoRemover, BasicEchoRemoval) { + constexpr int kNumBlocksToProcess = 500; + Random random_generator(42U); + for (auto rate : {8000, 16000, 32000, 48000}) { + std::vector<std::vector<float>> x(NumBandsForRate(rate), + std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> y(NumBandsForRate(rate), + std::vector<float>(kBlockSize, 0.f)); + EchoPathVariability echo_path_variability(false, false); + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(rate, delay_samples)); + std::unique_ptr<EchoRemover> remover( + EchoRemover::Create(EchoCanceller3Config(), rate)); + std::unique_ptr<RenderDelayBuffer> render_buffer( + RenderDelayBuffer::Create( + NumBandsForRate(rate), kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, + kNumMatchedFilters))); + std::vector<std::unique_ptr<DelayBuffer<float>>> delay_buffers(x.size()); + for (size_t j = 0; j < x.size(); ++j) { + delay_buffers[j].reset(new DelayBuffer<float>(delay_samples)); + } + + float input_energy = 0.f; + float output_energy = 0.f; + for (int k = 0; k < kNumBlocksToProcess; ++k) { + const bool silence = k < 100 || (k % 100 >= 10); + + for (size_t j = 0; j < x.size(); ++j) { + if (silence) { + std::fill(x[j].begin(), x[j].end(), 0.f); + } else { + RandomizeSampleVector(&random_generator, x[j]); + } + delay_buffers[j]->Delay(x[j], y[j]); + } + + if (k > kNumBlocksToProcess / 2) { + for (size_t j = 0; j < x.size(); ++j) { + input_energy = std::inner_product(y[j].begin(), y[j].end(), + y[j].begin(), input_energy); + } + } + + render_buffer->Insert(x); + render_buffer->UpdateBuffers(); + + remover->ProcessCapture(delay_samples, echo_path_variability, false, + render_buffer->GetRenderBuffer(), &y); + + if (k > kNumBlocksToProcess / 2) { + for (size_t j = 0; j < x.size(); ++j) { + output_energy = std::inner_product(y[j].begin(), y[j].end(), + y[j].begin(), output_energy); + } + } + } + EXPECT_GT(input_energy, 10.f * output_energy); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator.cc new file mode 100644 index 0000000000..3f12ba41a1 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator.cc @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erl_estimator.h" + +#include <algorithm> +#include <numeric> + +namespace webrtc { + +namespace { + +constexpr float kMinErl = 0.01f; +constexpr float kMaxErl = 1000.f; + +} // namespace + +ErlEstimator::ErlEstimator() { + erl_.fill(kMaxErl); + hold_counters_.fill(0); + erl_time_domain_ = kMaxErl; + hold_counter_time_domain_ = 0; +} + +ErlEstimator::~ErlEstimator() = default; + +void ErlEstimator::Update( + const std::array<float, kFftLengthBy2Plus1>& render_spectrum, + const std::array<float, kFftLengthBy2Plus1>& capture_spectrum) { + const auto& X2 = render_spectrum; + const auto& Y2 = capture_spectrum; + + // Corresponds to WGN of power -46 dBFS. + constexpr float kX2Min = 44015068.0f; + + // Update the estimates in a maximum statistics manner. + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (X2[k] > kX2Min) { + const float new_erl = Y2[k] / X2[k]; + if (new_erl < erl_[k]) { + hold_counters_[k - 1] = 1000; + erl_[k] += 0.1f * (new_erl - erl_[k]); + erl_[k] = std::max(erl_[k], kMinErl); + } + } + } + + std::for_each(hold_counters_.begin(), hold_counters_.end(), + [](int& a) { --a; }); + std::transform(hold_counters_.begin(), hold_counters_.end(), erl_.begin() + 1, + erl_.begin() + 1, [](int a, float b) { + return a > 0 ? b : std::min(kMaxErl, 2.f * b); + }); + + erl_[0] = erl_[1]; + erl_[kFftLengthBy2] = erl_[kFftLengthBy2 - 1]; + + // Compute ERL over all frequency bins. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + + if (X2_sum > kX2Min * X2.size()) { + const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); + const float new_erl = Y2_sum / X2_sum; + if (new_erl < erl_time_domain_) { + hold_counter_time_domain_ = 1000; + erl_time_domain_ += 0.1f * (new_erl - erl_time_domain_); + erl_time_domain_ = std::max(erl_time_domain_, kMinErl); + } + } + + --hold_counter_time_domain_; + erl_time_domain_ = (hold_counter_time_domain_ > 0) + ? erl_time_domain_ + : std::min(kMaxErl, 2.f * erl_time_domain_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator.h new file mode 100644 index 0000000000..24b3f4b104 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ + +#include <array> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Estimates the echo return loss based on the signal spectra. +class ErlEstimator { + public: + ErlEstimator(); + ~ErlEstimator(); + + // Updates the ERL estimate. + void Update(const std::array<float, kFftLengthBy2Plus1>& render_spectrum, + const std::array<float, kFftLengthBy2Plus1>& capture_spectrum); + + // Returns the most recent ERL estimate. + const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; } + float ErlTimeDomain() const { return erl_time_domain_; } + + private: + std::array<float, kFftLengthBy2Plus1> erl_; + std::array<int, kFftLengthBy2Minus1> hold_counters_; + float erl_time_domain_; + int hold_counter_time_domain_; + + RTC_DISALLOW_COPY_AND_ASSIGN(ErlEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc new file mode 100644 index 0000000000..a406581001 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator_unittest.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erl_estimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +void VerifyErl(const std::array<float, kFftLengthBy2Plus1>& erl, + float erl_time_domain, + float reference) { + std::for_each(erl.begin(), erl.end(), + [reference](float a) { EXPECT_NEAR(reference, a, 0.001); }); + EXPECT_NEAR(reference, erl_time_domain, 0.001); +} + +} // namespace + +// Verifies that the correct ERL estimates are achieved. +TEST(ErlEstimator, Estimates) { + std::array<float, kFftLengthBy2Plus1> X2; + std::array<float, kFftLengthBy2Plus1> Y2; + + ErlEstimator estimator; + + // Verifies that the ERL estimate is properly reduced to lower values. + X2.fill(500 * 1000.f * 1000.f); + Y2.fill(10 * X2[0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 10.f); + + // Verifies that the ERL is not immediately increased when the ERL in the data + // increases. + Y2.fill(10000 * X2[0]); + for (size_t k = 0; k < 998; ++k) { + estimator.Update(X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 10.f); + + // Verifies that the rate of increase is 3 dB. + estimator.Update(X2, Y2); + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 20.f); + + // Verifies that the maximum ERL is achieved when there are no low RLE + // estimates. + for (size_t k = 0; k < 1000; ++k) { + estimator.Update(X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 1000.f); + + // Verifies that the ERL estimate is is not updated for low-level signals + X2.fill(1000.f * 1000.f); + Y2.fill(10 * X2[0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(X2, Y2); + } + VerifyErl(estimator.Erl(), estimator.ErlTimeDomain(), 1000.f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator.cc new file mode 100644 index 0000000000..385e6dd7e6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erle_estimator.h" + +#include <algorithm> +#include <numeric> + +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +ErleEstimator::ErleEstimator(float min_erle, + float max_erle_lf, + float max_erle_hf) + : min_erle_(min_erle), + max_erle_lf_(max_erle_lf), + max_erle_hf_(max_erle_hf) { + erle_.fill(min_erle_); + hold_counters_.fill(0); + erle_time_domain_ = min_erle_; + hold_counter_time_domain_ = 0; +} + +ErleEstimator::~ErleEstimator() = default; + +void ErleEstimator::Update( + const std::array<float, kFftLengthBy2Plus1>& render_spectrum, + const std::array<float, kFftLengthBy2Plus1>& capture_spectrum, + const std::array<float, kFftLengthBy2Plus1>& subtractor_spectrum) { + const auto& X2 = render_spectrum; + const auto& Y2 = capture_spectrum; + const auto& E2 = subtractor_spectrum; + + // Corresponds of WGN of power -46 dBFS. + constexpr float kX2Min = 44015068.0f; + + // Update the estimates in a clamped minimum statistics manner. + auto erle_update = [&](size_t start, size_t stop, float max_erle) { + for (size_t k = start; k < stop; ++k) { + if (X2[k] > kX2Min && E2[k] > 0.f) { + const float new_erle = Y2[k] / E2[k]; + if (new_erle > erle_[k]) { + hold_counters_[k - 1] = 100; + erle_[k] += 0.1f * (new_erle - erle_[k]); + erle_[k] = rtc::SafeClamp(erle_[k], min_erle_, max_erle); + } + } + } + }; + erle_update(1, kFftLengthBy2 / 2, max_erle_lf_); + erle_update(kFftLengthBy2 / 2, kFftLengthBy2, max_erle_hf_); + + std::for_each(hold_counters_.begin(), hold_counters_.end(), + [](int& a) { --a; }); + std::transform(hold_counters_.begin(), hold_counters_.end(), + erle_.begin() + 1, erle_.begin() + 1, [&](int a, float b) { + return a > 0 ? b : std::max(min_erle_, 0.97f * b); + }); + + erle_[0] = erle_[1]; + erle_[kFftLengthBy2] = erle_[kFftLengthBy2 - 1]; + + // Compute ERLE over all frequency bins. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + const float E2_sum = std::accumulate(E2.begin(), E2.end(), 0.0f); + if (X2_sum > kX2Min * X2.size() && E2_sum > 0.f) { + const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); + const float new_erle = Y2_sum / E2_sum; + if (new_erle > erle_time_domain_) { + hold_counter_time_domain_ = 100; + erle_time_domain_ += 0.1f * (new_erle - erle_time_domain_); + erle_time_domain_ = + rtc::SafeClamp(erle_time_domain_, min_erle_, max_erle_lf_); + } + } + --hold_counter_time_domain_; + erle_time_domain_ = (hold_counter_time_domain_ > 0) + ? erle_time_domain_ + : std::max(min_erle_, 0.97f * erle_time_domain_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator.h new file mode 100644 index 0000000000..d88b11bbb8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ + +#include <array> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement based on the signal spectra. +class ErleEstimator { + public: + ErleEstimator(float min_erle, float max_erle_lf, float max_erle_hf); + ~ErleEstimator(); + + // Updates the ERLE estimate. + void Update(const std::array<float, kFftLengthBy2Plus1>& render_spectrum, + const std::array<float, kFftLengthBy2Plus1>& capture_spectrum, + const std::array<float, kFftLengthBy2Plus1>& subtractor_spectrum); + + // Returns the most recent ERLE estimate. + const std::array<float, kFftLengthBy2Plus1>& Erle() const { return erle_; } + float ErleTimeDomain() const { return erle_time_domain_; } + + private: + std::array<float, kFftLengthBy2Plus1> erle_; + std::array<int, kFftLengthBy2Minus1> hold_counters_; + float erle_time_domain_; + int hold_counter_time_domain_; + const float min_erle_; + const float max_erle_lf_; + const float max_erle_hf_; + + RTC_DISALLOW_COPY_AND_ASSIGN(ErleEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc new file mode 100644 index 0000000000..f3dd7d9bbb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/erle_estimator.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2; + +void VerifyErle(const std::array<float, kFftLengthBy2Plus1>& erle, + float erle_time_domain, + float reference_lf, + float reference_hf) { + std::for_each( + erle.begin(), erle.begin() + kLowFrequencyLimit, + [reference_lf](float a) { EXPECT_NEAR(reference_lf, a, 0.001); }); + std::for_each( + erle.begin() + kLowFrequencyLimit, erle.end(), + [reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); }); + EXPECT_NEAR(reference_lf, erle_time_domain, 0.001); +} + +} // namespace + +// Verifies that the correct ERLE estimates are achieved. +TEST(ErleEstimator, Estimates) { + std::array<float, kFftLengthBy2Plus1> X2; + std::array<float, kFftLengthBy2Plus1> E2; + std::array<float, kFftLengthBy2Plus1> Y2; + + ErleEstimator estimator(1.f, 8.f, 1.5f); + + // Verifies that the ERLE estimate is properley increased to higher values. + X2.fill(500 * 1000.f * 1000.f); + E2.fill(1000.f * 1000.f); + Y2.fill(10 * E2[0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(X2, Y2, E2); + } + VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), 8.f, 1.5f); + + // Verifies that the ERLE is not immediately decreased when the ERLE in the + // data decreases. + Y2.fill(0.1f * E2[0]); + for (size_t k = 0; k < 98; ++k) { + estimator.Update(X2, Y2, E2); + } + VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), 8.f, 1.5f); + + // Verifies that the minimum ERLE is eventually achieved. + for (size_t k = 0; k < 1000; ++k) { + estimator.Update(X2, Y2, E2); + } + VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), 1.f, 1.f); + + // Verifies that the ERLE estimate is is not updated for low-level render + // signals. + X2.fill(1000.f * 1000.f); + Y2.fill(10 * E2[0]); + for (size_t k = 0; k < 200; ++k) { + estimator.Update(X2, Y2, E2); + } + VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), 1.f, 1.f); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/fft_data.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/fft_data.h new file mode 100644 index 0000000000..a5c51bf342 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/fft_data.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ + +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include <algorithm> +#include <array> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +namespace webrtc { + +// Struct that holds imaginary data produced from 128 point real-valued FFTs. +struct FftData { + // Copies the data in src. + void Assign(const FftData& src) { + std::copy(src.re.begin(), src.re.end(), re.begin()); + std::copy(src.im.begin(), src.im.end(), im.begin()); + im[0] = im[kFftLengthBy2] = 0; + } + + // Clears all the imaginary. + void Clear() { + re.fill(0.f); + im.fill(0.f); + } + + // Computes the power spectrum of the data. + void Spectrum(Aec3Optimization optimization, + std::array<float, kFftLengthBy2Plus1>* power_spectrum) const { + RTC_DCHECK(power_spectrum); + switch (optimization) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + constexpr int kLimit = kNumFourBinBands * 4; + for (size_t k = 0; k < kLimit; k += 4) { + const __m128 r = _mm_loadu_ps(&re[k]); + const __m128 i = _mm_loadu_ps(&im[k]); + const __m128 ii = _mm_mul_ps(i, i); + const __m128 rr = _mm_mul_ps(r, r); + const __m128 rrii = _mm_add_ps(rr, ii); + _mm_storeu_ps(&(*power_spectrum)[k], rrii); + } + (*power_spectrum)[kFftLengthBy2] = + re[kFftLengthBy2] * re[kFftLengthBy2] + + im[kFftLengthBy2] * im[kFftLengthBy2]; + } break; +#endif + default: + std::transform(re.begin(), re.end(), im.begin(), + power_spectrum->begin(), + [](float a, float b) { return a * a + b * b; }); + } + } + + // Copy the data from an interleaved array. + void CopyFromPackedArray(const std::array<float, kFftLength>& v) { + re[0] = v[0]; + re[kFftLengthBy2] = v[1]; + im[0] = im[kFftLengthBy2] = 0; + for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) { + re[k] = v[j++]; + im[k] = v[j++]; + } + } + + // Copies the data into an interleaved array. + void CopyToPackedArray(std::array<float, kFftLength>* v) const { + RTC_DCHECK(v); + (*v)[0] = re[0]; + (*v)[1] = re[kFftLengthBy2]; + for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) { + (*v)[j++] = re[k]; + (*v)[j++] = im[k]; + } + } + + std::array<float, kFftLengthBy2Plus1> re; + std::array<float, kFftLengthBy2Plus1> im; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/fft_data_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/fft_data_unittest.cc new file mode 100644 index 0000000000..d969744c04 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/fft_data_unittest.cc @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fft_data.h" + +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods are bitexact to their reference +// counterparts. +TEST(FftData, TestOptimizations) { + if (WebRtc_GetCPUInfo(kSSE2) != 0) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array<float, kFftLengthBy2Plus1> spectrum; + std::array<float, kFftLengthBy2Plus1> spectrum_sse2; + x.Spectrum(Aec3Optimization::kNone, &spectrum); + x.Spectrum(Aec3Optimization::kSse2, &spectrum_sse2); + EXPECT_EQ(spectrum, spectrum_sse2); + } +} +#endif + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for null output in CopyToPackedArray. +TEST(FftData, NonNullCopyToPackedArrayOutput) { + EXPECT_DEATH(FftData().CopyToPackedArray(nullptr), ""); +} + +// Verifies the check for null output in Spectrum. +TEST(FftData, NonNullSpectrumOutput) { + EXPECT_DEATH(FftData().Spectrum(Aec3Optimization::kNone, nullptr), ""); +} + +#endif + +// Verifies that the Assign method properly copies the data from the source and +// ensures that the imaginary components for the DC and Nyquist bins are 0. +TEST(FftData, Assign) { + FftData x; + FftData y; + + x.re.fill(1.f); + x.im.fill(2.f); + y.Assign(x); + EXPECT_EQ(x.re, y.re); + EXPECT_EQ(0.f, y.im[0]); + EXPECT_EQ(0.f, y.im[x.im.size() - 1]); + for (size_t k = 1; k < x.im.size() - 1; ++k) { + EXPECT_EQ(x.im[k], y.im[k]); + } +} + +// Verifies that the Clear method properly clears all the data. +TEST(FftData, Clear) { + FftData x_ref; + FftData x; + + x_ref.re.fill(0.f); + x_ref.im.fill(0.f); + + x.re.fill(1.f); + x.im.fill(2.f); + x.Clear(); + + EXPECT_EQ(x_ref.re, x.re); + EXPECT_EQ(x_ref.im, x.im); +} + +// Verifies that the spectrum is correctly computed. +TEST(FftData, Spectrum) { + FftData x; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + std::array<float, kFftLengthBy2Plus1> spectrum; + x.Spectrum(Aec3Optimization::kNone, &spectrum); + + EXPECT_EQ(x.re[0] * x.re[0], spectrum[0]); + EXPECT_EQ(x.re[spectrum.size() - 1] * x.re[spectrum.size() - 1], + spectrum[spectrum.size() - 1]); + for (size_t k = 1; k < spectrum.size() - 1; ++k) { + EXPECT_EQ(x.re[k] * x.re[k] + x.im[k] * x.im[k], spectrum[k]); + } +} + +// Verifies that the functionality in CopyToPackedArray works as intended. +TEST(FftData, CopyToPackedArray) { + FftData x; + std::array<float, kFftLength> x_packed; + + for (size_t k = 0; k < x.re.size(); ++k) { + x.re[k] = k + 1; + } + + x.im[0] = x.im[x.im.size() - 1] = 0.f; + for (size_t k = 1; k < x.im.size() - 1; ++k) { + x.im[k] = 2.f * (k + 1); + } + + x.CopyToPackedArray(&x_packed); + + EXPECT_EQ(x.re[0], x_packed[0]); + EXPECT_EQ(x.re[x.re.size() - 1], x_packed[1]); + for (size_t k = 1; k < x_packed.size() / 2; ++k) { + EXPECT_EQ(x.re[k], x_packed[2 * k]); + EXPECT_EQ(x.im[k], x_packed[2 * k + 1]); + } +} + +// Verifies that the functionality in CopyFromPackedArray works as intended +// (relies on that the functionality in CopyToPackedArray has been verified in +// the test above). +TEST(FftData, CopyFromPackedArray) { + FftData x_ref; + FftData x; + std::array<float, kFftLength> x_packed; + + for (size_t k = 0; k < x_ref.re.size(); ++k) { + x_ref.re[k] = k + 1; + } + + x_ref.im[0] = x_ref.im[x_ref.im.size() - 1] = 0.f; + for (size_t k = 1; k < x_ref.im.size() - 1; ++k) { + x_ref.im[k] = 2.f * (k + 1); + } + + x_ref.CopyToPackedArray(&x_packed); + x.CopyFromPackedArray(x_packed); + + EXPECT_EQ(x_ref.re, x.re); + EXPECT_EQ(x_ref.im, x.im); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker.cc new file mode 100644 index 0000000000..0a0c0e2fae --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/frame_blocker.h" + +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { + +FrameBlocker::FrameBlocker(size_t num_bands) + : num_bands_(num_bands), buffer_(num_bands_) { + for (auto& b : buffer_) { + b.reserve(kBlockSize); + RTC_DCHECK(b.empty()); + } +} + +FrameBlocker::~FrameBlocker() = default; + +void FrameBlocker::InsertSubFrameAndExtractBlock( + const std::vector<rtc::ArrayView<float>>& sub_frame, + std::vector<std::vector<float>>* block) { + RTC_DCHECK(block); + RTC_DCHECK_EQ(num_bands_, block->size()); + RTC_DCHECK_EQ(num_bands_, sub_frame.size()); + for (size_t i = 0; i < num_bands_; ++i) { + RTC_DCHECK_GE(kBlockSize - 16, buffer_[i].size()); + RTC_DCHECK_EQ(kBlockSize, (*block)[i].size()); + RTC_DCHECK_EQ(kSubFrameLength, sub_frame[i].size()); + const int samples_to_block = kBlockSize - buffer_[i].size(); + (*block)[i].clear(); + (*block)[i].insert((*block)[i].begin(), buffer_[i].begin(), + buffer_[i].end()); + (*block)[i].insert((*block)[i].begin() + buffer_[i].size(), + sub_frame[i].begin(), + sub_frame[i].begin() + samples_to_block); + buffer_[i].clear(); + buffer_[i].insert(buffer_[i].begin(), + sub_frame[i].begin() + samples_to_block, + sub_frame[i].end()); + } +} + +bool FrameBlocker::IsBlockAvailable() const { + return kBlockSize == buffer_[0].size(); +} + +void FrameBlocker::ExtractBlock(std::vector<std::vector<float>>* block) { + RTC_DCHECK(block); + RTC_DCHECK_EQ(num_bands_, block->size()); + RTC_DCHECK(IsBlockAvailable()); + for (size_t i = 0; i < num_bands_; ++i) { + RTC_DCHECK_EQ(kBlockSize, buffer_[i].size()); + RTC_DCHECK_EQ(kBlockSize, (*block)[i].size()); + (*block)[i].clear(); + (*block)[i].insert((*block)[i].begin(), buffer_[i].begin(), + buffer_[i].end()); + buffer_[i].clear(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker.h new file mode 100644 index 0000000000..08e1e1dd23 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ + +#include <stddef.h> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Class for producing 64 sample multiband blocks from frames consisting of 1 or +// 2 subframes of 80 samples. +class FrameBlocker { + public: + explicit FrameBlocker(size_t num_bands); + ~FrameBlocker(); + // Inserts one 80 sample multiband subframe from the multiband frame and + // extracts one 64 sample multiband block. + void InsertSubFrameAndExtractBlock( + const std::vector<rtc::ArrayView<float>>& sub_frame, + std::vector<std::vector<float>>* block); + // Reports whether a multiband block of 64 samples is available for + // extraction. + bool IsBlockAvailable() const; + // Extracts a multiband block of 64 samples. + void ExtractBlock(std::vector<std::vector<float>>* block); + + private: + const size_t num_bands_; + std::vector<std::vector<float>> buffer_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FrameBlocker); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc new file mode 100644 index 0000000000..6e73d4bc8f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker_unittest.cc @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/frame_blocker.h" + +#include <sstream> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_framer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float ComputeSampleValue(size_t chunk_counter, + size_t chunk_size, + size_t band, + size_t sample_index, + int offset) { + float value = + static_cast<int>(chunk_counter * chunk_size + sample_index) + offset; + return value > 0 ? 5000 * band + value : 0; +} + +void FillSubFrame(size_t sub_frame_counter, + int offset, + std::vector<std::vector<float>>* sub_frame) { + for (size_t k = 0; k < sub_frame->size(); ++k) { + for (size_t i = 0; i < (*sub_frame)[0].size(); ++i) { + (*sub_frame)[k][i] = + ComputeSampleValue(sub_frame_counter, kSubFrameLength, k, i, offset); + } + } +} + +void FillSubFrameView(size_t sub_frame_counter, + int offset, + std::vector<std::vector<float>>* sub_frame, + std::vector<rtc::ArrayView<float>>* sub_frame_view) { + FillSubFrame(sub_frame_counter, offset, sub_frame); + for (size_t k = 0; k < sub_frame_view->size(); ++k) { + (*sub_frame_view)[k] = + rtc::ArrayView<float>(&(*sub_frame)[k][0], (*sub_frame)[k].size()); + } +} + +bool VerifySubFrame(size_t sub_frame_counter, + int offset, + const std::vector<rtc::ArrayView<float>>& sub_frame_view) { + std::vector<std::vector<float>> reference_sub_frame( + sub_frame_view.size(), std::vector<float>(sub_frame_view[0].size(), 0.f)); + FillSubFrame(sub_frame_counter, offset, &reference_sub_frame); + for (size_t k = 0; k < sub_frame_view.size(); ++k) { + for (size_t i = 0; i < sub_frame_view[k].size(); ++i) { + if (reference_sub_frame[k][i] != sub_frame_view[k][i]) { + return false; + } + } + } + return true; +} + +bool VerifyBlock(size_t block_counter, + int offset, + const std::vector<std::vector<float>>& block) { + for (size_t k = 0; k < block.size(); ++k) { + for (size_t i = 0; i < block[k].size(); ++i) { + const float reference_value = + ComputeSampleValue(block_counter, kBlockSize, k, i, offset); + if (reference_value != block[k][i]) { + return false; + } + } + } + return true; +} + +// Verifies that the FrameBlocker properly forms blocks out of the frames. +void RunBlockerTest(int sample_rate_hz) { + constexpr size_t kNumSubFramesToProcess = 20; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> block(num_bands, + std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> input_sub_frame( + num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> input_sub_frame_view(num_bands); + FrameBlocker blocker(num_bands); + + size_t block_counter = 0; + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillSubFrameView(sub_frame_index, 0, &input_sub_frame, + &input_sub_frame_view); + + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + VerifyBlock(block_counter++, 0, block); + + if ((sub_frame_index + 1) % 4 == 0) { + EXPECT_TRUE(blocker.IsBlockAvailable()); + } else { + EXPECT_FALSE(blocker.IsBlockAvailable()); + } + if (blocker.IsBlockAvailable()) { + blocker.ExtractBlock(&block); + VerifyBlock(block_counter++, 0, block); + } + } +} + +// Verifies that the FrameBlocker and BlockFramer work well together and produce +// the expected output. +void RunBlockerAndFramerTest(int sample_rate_hz) { + const size_t kNumSubFramesToProcess = 20; + const size_t num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> block(num_bands, + std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> input_sub_frame( + num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<std::vector<float>> output_sub_frame( + num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> output_sub_frame_view(num_bands); + std::vector<rtc::ArrayView<float>> input_sub_frame_view(num_bands); + FrameBlocker blocker(num_bands); + BlockFramer framer(num_bands); + + for (size_t sub_frame_index = 0; sub_frame_index < kNumSubFramesToProcess; + ++sub_frame_index) { + FillSubFrameView(sub_frame_index, 0, &input_sub_frame, + &input_sub_frame_view); + FillSubFrameView(sub_frame_index, 0, &output_sub_frame, + &output_sub_frame_view); + + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + framer.InsertBlockAndExtractSubFrame(block, &output_sub_frame_view); + + if ((sub_frame_index + 1) % 4 == 0) { + EXPECT_TRUE(blocker.IsBlockAvailable()); + } else { + EXPECT_FALSE(blocker.IsBlockAvailable()); + } + if (blocker.IsBlockAvailable()) { + blocker.ExtractBlock(&block); + framer.InsertBlock(block); + } + EXPECT_TRUE(VerifySubFrame(sub_frame_index, -64, output_sub_frame_view)); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the FrameBlocker crashes if the InsertSubFrameAndExtractBlock +// method is called for inputs with the wrong number of bands or band lengths. +void RunWronglySizedInsertAndExtractParametersTest(int sample_rate_hz, + size_t num_block_bands, + size_t block_length, + size_t num_sub_frame_bands, + size_t sub_frame_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> block(num_block_bands, + std::vector<float>(block_length, 0.f)); + std::vector<std::vector<float>> input_sub_frame( + num_sub_frame_bands, std::vector<float>(sub_frame_length, 0.f)); + std::vector<rtc::ArrayView<float>> input_sub_frame_view( + input_sub_frame.size()); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(correct_num_bands); + EXPECT_DEATH( + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block), ""); +} + +// Verifies that the FrameBlocker crashes if the ExtractBlock method is called +// for inputs with the wrong number of bands or band lengths. +void RunWronglySizedExtractParameterTest(int sample_rate_hz, + size_t num_block_bands, + size_t block_length) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> correct_block( + correct_num_bands, std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> wrong_block( + num_block_bands, std::vector<float>(block_length, 0.f)); + std::vector<std::vector<float>> input_sub_frame( + correct_num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> input_sub_frame_view( + input_sub_frame.size()); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(correct_num_bands); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &correct_block); + + EXPECT_DEATH(blocker.ExtractBlock(&wrong_block), ""); +} + +// Verifies that the FrameBlocker crashes if the ExtractBlock method is called +// after a wrong number of previous InsertSubFrameAndExtractBlock method calls +// have been made. +void RunWrongExtractOrderTest(int sample_rate_hz, + size_t num_preceeding_api_calls) { + const size_t correct_num_bands = NumBandsForRate(sample_rate_hz); + + std::vector<std::vector<float>> block(correct_num_bands, + std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> input_sub_frame( + correct_num_bands, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> input_sub_frame_view( + input_sub_frame.size()); + FillSubFrameView(0, 0, &input_sub_frame, &input_sub_frame_view); + FrameBlocker blocker(correct_num_bands); + for (size_t k = 0; k < num_preceeding_api_calls; ++k) { + blocker.InsertSubFrameAndExtractBlock(input_sub_frame_view, &block); + } + + EXPECT_DEATH(blocker.ExtractBlock(&block), ""); +} +#endif + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(FrameBlocker, WrongNumberOfBandsInBlockForInsertSubFrameAndExtractBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, wrong_num_bands, kBlockSize, correct_num_bands, kSubFrameLength); + } +} + +TEST(FrameBlocker, + WrongNumberOfBandsInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_bands, kBlockSize, wrong_num_bands, kSubFrameLength); + } +} + +TEST(FrameBlocker, + WrongNumberOfSamplesInBlockForInsertSubFrameAndExtractBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest( + rate, correct_num_bands, kBlockSize - 1, correct_num_bands, + kSubFrameLength); + } +} + +TEST(FrameBlocker, + WrongNumberOfSamplesInSubFrameForInsertSubFrameAndExtractBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedInsertAndExtractParametersTest(rate, correct_num_bands, + kBlockSize, correct_num_bands, + kSubFrameLength - 1); + } +} + +TEST(FrameBlocker, WrongNumberOfBandsInBlockForExtractBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + const size_t wrong_num_bands = (correct_num_bands % 3) + 1; + RunWronglySizedExtractParameterTest(rate, wrong_num_bands, kBlockSize); + } +} + +TEST(FrameBlocker, WrongNumberOfSamplesInBlockForExtractBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + const size_t correct_num_bands = NumBandsForRate(rate); + RunWronglySizedExtractParameterTest(rate, correct_num_bands, + kBlockSize - 1); + } +} + +TEST(FrameBlocker, WrongNumberOfPreceedingApiCallsForExtractBlock) { + for (auto rate : {8000, 16000, 32000, 48000}) { + for (size_t num_calls = 0; num_calls < 4; ++num_calls) { + std::ostringstream ss; + ss << "Sample rate: " << rate; + ss << ", Num preceeding InsertSubFrameAndExtractBlock calls: " + << num_calls; + + SCOPED_TRACE(ss.str()); + RunWrongExtractOrderTest(rate, num_calls); + } + } +} + +// Verifiers that the verification for null sub_frame pointer works. +TEST(FrameBlocker, NullBlockParameter) { + std::vector<std::vector<float>> sub_frame( + 1, std::vector<float>(kSubFrameLength, 0.f)); + std::vector<rtc::ArrayView<float>> sub_frame_view(sub_frame.size()); + FillSubFrameView(0, 0, &sub_frame, &sub_frame_view); + EXPECT_DEATH( + FrameBlocker(1).InsertSubFrameAndExtractBlock(sub_frame_view, nullptr), + ""); +} + +#endif + +TEST(FrameBlocker, BlockBitexactness) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunBlockerTest(rate); + } +} + +TEST(FrameBlocker, BlockerAndFramer) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + RunBlockerAndFramerTest(rate); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc new file mode 100644 index 0000000000..1dd2a20b27 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/main_filter_update_gain.h" + +#include <algorithm> +#include <functional> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +constexpr float kHErrorInitial = 10000.f; +constexpr int kPoorExcitationCounterInitial = 1000; + +} // namespace + +int MainFilterUpdateGain::instance_count_ = 0; + +MainFilterUpdateGain::MainFilterUpdateGain() + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + poor_excitation_counter_(kPoorExcitationCounterInitial) { + H_error_.fill(kHErrorInitial); +} + +MainFilterUpdateGain::~MainFilterUpdateGain() {} + +void MainFilterUpdateGain::HandleEchoPathChange() { + H_error_.fill(kHErrorInitial); + poor_excitation_counter_ = kPoorExcitationCounterInitial; + call_counter_ = 0; +} + +void MainFilterUpdateGain::Compute( + const RenderBuffer& render_buffer, + const RenderSignalAnalyzer& render_signal_analyzer, + const SubtractorOutput& subtractor_output, + const AdaptiveFirFilter& filter, + bool saturated_capture_signal, + FftData* gain_fft) { + RTC_DCHECK(gain_fft); + // Introducing shorter notation to improve readability. + const FftData& E_main = subtractor_output.E_main; + const auto& E2_main = subtractor_output.E2_main; + const auto& E2_shadow = subtractor_output.E2_shadow; + FftData* G = gain_fft; + const size_t size_partitions = filter.SizePartitions(); + const auto& X2 = render_buffer.SpectralSum(size_partitions); + const auto& erl = filter.Erl(); + + ++call_counter_; + + if (render_signal_analyzer.PoorSignalExcitation()) { + poor_excitation_counter_ = 0; + } + + // Do not update the filter if the render is not sufficiently excited. + if (++poor_excitation_counter_ < size_partitions || + saturated_capture_signal || call_counter_ <= size_partitions) { + G->re.fill(0.f); + G->im.fill(0.f); + } else { + // Corresponds to WGN of power -39 dBFS. + constexpr float kNoiseGatePower = 220075344.f; + std::array<float, kFftLengthBy2Plus1> mu; + // mu = H_error / (0.5* H_error* X2 + n * E2). + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + mu[k] = X2[k] > kNoiseGatePower + ? H_error_[k] / (0.5f * H_error_[k] * X2[k] + + size_partitions * E2_main[k]) + : 0.f; + } + + // Avoid updating the filter close to narrow bands in the render signals. + render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu); + + // H_error = H_error - 0.5 * mu * X2 * H_error. + for (size_t k = 0; k < H_error_.size(); ++k) { + H_error_[k] -= 0.5f * mu[k] * X2[k] * H_error_[k]; + } + + // G = mu * E. + std::transform(mu.begin(), mu.end(), E_main.re.begin(), G->re.begin(), + std::multiplies<float>()); + std::transform(mu.begin(), mu.end(), E_main.im.begin(), G->im.begin(), + std::multiplies<float>()); + } + + // H_error = H_error + factor * erl. + std::array<float, kFftLengthBy2Plus1> H_error_increase; + constexpr float kErlScaleAccurate = 1.f / 100.0f; + constexpr float kErlScaleInaccurate = 1.f / 60.0f; + std::transform(E2_shadow.begin(), E2_shadow.end(), E2_main.begin(), + H_error_increase.begin(), [&](float a, float b) { + return a >= b ? kErlScaleAccurate : kErlScaleInaccurate; + }); + std::transform(erl.begin(), erl.end(), H_error_increase.begin(), + H_error_increase.begin(), std::multiplies<float>()); + std::transform(H_error_.begin(), H_error_.end(), H_error_increase.begin(), + H_error_.begin(), + [&](float a, float b) { return std::max(a + b, 0.1f); }); + + data_dumper_->DumpRaw("aec3_main_gain_H_error", H_error_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain.h new file mode 100644 index 0000000000..756a5d0e5d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MAIN_FILTER_UPDATE_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MAIN_FILTER_UPDATE_GAIN_H_ + +#include <memory> +#include <vector> + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +// Provides functionality for computing the adaptive gain for the main filter. +class MainFilterUpdateGain { + public: + MainFilterUpdateGain(); + ~MainFilterUpdateGain(); + + // Takes action in the case of a known echo path change. + void HandleEchoPathChange(); + + // Computes the gain. + void Compute(const RenderBuffer& render_buffer, + const RenderSignalAnalyzer& render_signal_analyzer, + const SubtractorOutput& subtractor_output, + const AdaptiveFirFilter& filter, + bool saturated_capture_signal, + FftData* gain_fft); + + private: + static int instance_count_; + std::unique_ptr<ApmDataDumper> data_dumper_; + std::array<float, kFftLengthBy2Plus1> H_error_; + size_t poor_excitation_counter_; + size_t call_counter_ = 0; + RTC_DISALLOW_COPY_AND_ASSIGN(MainFilterUpdateGain); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MAIN_FILTER_UPDATE_GAIN_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc new file mode 100644 index 0000000000..203731a929 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/main_filter_update_gain.h" + +#include <algorithm> +#include <numeric> +#include <string> + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/aec3/shadow_filter_update_gain.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Method for performing the simulations needed to test the main filter update +// gain functionality. +void RunFilterUpdateTest(int num_blocks_to_process, + size_t delay_samples, + const std::vector<int>& blocks_with_echo_path_changes, + const std::vector<int>& blocks_with_saturation, + bool use_silent_render_in_second_half, + std::array<float, kBlockSize>* e_last_block, + std::array<float, kBlockSize>* y_last_block, + FftData* G_last_block) { + ApmDataDumper data_dumper(42); + AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper); + AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper); + Aec3Fft fft; + RenderBuffer render_buffer( + Aec3Optimization::kNone, 3, main_filter.SizePartitions(), + std::vector<size_t>(1, main_filter.SizePartitions())); + std::array<float, kBlockSize> x_old; + x_old.fill(0.f); + ShadowFilterUpdateGain shadow_gain; + MainFilterUpdateGain main_gain; + Random random_generator(42U); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + std::vector<float> y(kBlockSize, 0.f); + AecState aec_state(EchoCanceller3Config{}); + RenderSignalAnalyzer render_signal_analyzer; + std::array<float, kFftLength> s_scratch; + std::array<float, kBlockSize> s; + FftData S; + FftData G; + SubtractorOutput output; + output.Reset(); + FftData& E_main = output.E_main; + FftData E_shadow; + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kFftLengthBy2Plus1>& E2_main = output.E2_main; + std::array<float, kBlockSize>& e_main = output.e_main; + std::array<float, kBlockSize>& e_shadow = output.e_shadow; + Y2.fill(0.f); + + constexpr float kScale = 1.0f / kFftLengthBy2; + + DelayBuffer<float> delay_buffer(delay_samples); + for (int k = 0; k < num_blocks_to_process; ++k) { + // Handle echo path changes. + if (std::find(blocks_with_echo_path_changes.begin(), + blocks_with_echo_path_changes.end(), + k) != blocks_with_echo_path_changes.end()) { + main_filter.HandleEchoPathChange(); + } + + // Handle saturation. + const bool saturation = + std::find(blocks_with_saturation.begin(), blocks_with_saturation.end(), + k) != blocks_with_saturation.end(); + + // Create the render signal. + if (use_silent_render_in_second_half && k > num_blocks_to_process / 2) { + std::fill(x[0].begin(), x[0].end(), 0.f); + } else { + RandomizeSampleVector(&random_generator, x[0]); + } + delay_buffer.Delay(x[0], y); + render_buffer.Insert(x); + render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay()); + + // Apply the main filter. + main_filter.Filter(render_buffer, &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e_main.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_main.begin(), e_main.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_main, &E_main); + for (size_t k = 0; k < kBlockSize; ++k) { + s[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + + // Apply the shadow filter. + shadow_filter.Filter(render_buffer, &S); + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e_shadow.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_shadow.begin(), e_shadow.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_shadow, &E_shadow); + + // Compute spectra for future use. + E_main.Spectrum(Aec3Optimization::kNone, &output.E2_main); + E_shadow.Spectrum(Aec3Optimization::kNone, &output.E2_shadow); + + // Adapt the shadow filter. + shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow, + shadow_filter.SizePartitions(), saturation, &G); + shadow_filter.Adapt(render_buffer, G); + + // Adapt the main filter + main_gain.Compute(render_buffer, render_signal_analyzer, output, + main_filter, saturation, &G); + main_filter.Adapt(render_buffer, G); + + // Update the delay. + aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); + aec_state.Update(main_filter.FilterFrequencyResponse(), + main_filter.FilterImpulseResponse(), true, rtc::nullopt, + render_buffer, E2_main, Y2, x[0], s, false); + } + + std::copy(e_main.begin(), e_main.end(), e_last_block->begin()); + std::copy(y.begin(), y.end(), y_last_block->begin()); + std::copy(G.re.begin(), G.re.end(), G_last_block->re.begin()); + std::copy(G.im.begin(), G.im.end(), G_last_block->im.begin()); +} + +std::string ProduceDebugText(size_t delay) { + std::ostringstream ss; + ss << "Delay: " << delay; + return ss.str(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gain parameter works. +TEST(MainFilterUpdateGain, NullDataOutputGain) { + ApmDataDumper data_dumper(42); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, + filter.SizePartitions(), + std::vector<size_t>(1, filter.SizePartitions())); + RenderSignalAnalyzer analyzer; + SubtractorOutput output; + MainFilterUpdateGain gain; + EXPECT_DEATH( + gain.Compute(render_buffer, analyzer, output, filter, false, nullptr), + ""); +} + +#endif + +// Verifies that the gain formed causes the filter using it to converge. +TEST(MainFilterUpdateGain, GainCausesFilterToConverge) { + std::vector<int> blocks_with_echo_path_changes; + std::vector<int> blocks_with_saturation; + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples)); + + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> y; + FftData G; + + RunFilterUpdateTest(500, delay_samples, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G); + + // Verify that the main filter is able to perform well. + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } +} + +// Verifies that the magnitude of the gain on average decreases for a +// persistently exciting signal. +TEST(MainFilterUpdateGain, DecreasingGain) { + std::vector<int> blocks_with_echo_path_changes; + std::vector<int> blocks_with_saturation; + + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> y; + FftData G_a; + FftData G_b; + FftData G_c; + std::array<float, kFftLengthBy2Plus1> G_a_power; + std::array<float, kFftLengthBy2Plus1> G_b_power; + std::array<float, kFftLengthBy2Plus1> G_c_power; + + RunFilterUpdateTest(100, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_a); + RunFilterUpdateTest(200, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_b); + RunFilterUpdateTest(300, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_c); + + G_a.Spectrum(Aec3Optimization::kNone, &G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, &G_b_power); + G_c.Spectrum(Aec3Optimization::kNone, &G_c_power); + + EXPECT_GT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + + EXPECT_GT(std::accumulate(G_b_power.begin(), G_b_power.end(), 0.), + std::accumulate(G_c_power.begin(), G_c_power.end(), 0.)); +} + +// Verifies that the gain is zero when there is saturation and that the internal +// error estimates cause the gain to increase after a period of saturation. +TEST(MainFilterUpdateGain, SaturationBehavior) { + std::vector<int> blocks_with_echo_path_changes; + std::vector<int> blocks_with_saturation; + for (int k = 99; k < 200; ++k) { + blocks_with_saturation.push_back(k); + } + + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> y; + FftData G_a; + FftData G_b; + FftData G_a_ref; + G_a_ref.re.fill(0.f); + G_a_ref.im.fill(0.f); + + std::array<float, kFftLengthBy2Plus1> G_a_power; + std::array<float, kFftLengthBy2Plus1> G_b_power; + + RunFilterUpdateTest(100, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_a); + + EXPECT_EQ(G_a_ref.re, G_a.re); + EXPECT_EQ(G_a_ref.im, G_a.im); + + RunFilterUpdateTest(99, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_a); + RunFilterUpdateTest(201, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_b); + + G_a.Spectrum(Aec3Optimization::kNone, &G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, &G_b_power); + + EXPECT_LT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); +} + +// Verifies that the gain increases after an echo path change. +TEST(MainFilterUpdateGain, EchoPathChangeBehavior) { + std::vector<int> blocks_with_echo_path_changes; + std::vector<int> blocks_with_saturation; + blocks_with_echo_path_changes.push_back(99); + + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> y; + FftData G_a; + FftData G_b; + std::array<float, kFftLengthBy2Plus1> G_a_power; + std::array<float, kFftLengthBy2Plus1> G_b_power; + + RunFilterUpdateTest(99, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_a); + RunFilterUpdateTest(100, 65, blocks_with_echo_path_changes, + blocks_with_saturation, false, &e, &y, &G_b); + + G_a.Spectrum(Aec3Optimization::kNone, &G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, &G_b_power); + + EXPECT_LT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter.cc new file mode 100644 index 0000000000..4906bf9db7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter.cc @@ -0,0 +1,443 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/matched_filter.h" + +#if defined(WEBRTC_HAS_NEON) +#include <arm_neon.h> +#endif +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include <algorithm> +#include <numeric> + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) + +void MatchedFilterCore_NEON(size_t x_start_index, + float x2_sum_threshold, + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float> h, + bool* filters_updated, + float* error_sum) { + const int h_size = static_cast<int>(h.size()); + const int x_size = static_cast<int>(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + + // Initialize values for the accumulation. + float32x4_t s_128 = vdupq_n_f32(0); + float32x4_t x2_sum_128 = vdupq_n_f32(0); + float x2_sum = 0.f; + float s = 0; + + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast<int>(x_size - x_start_index)); + + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + const float32x4_t x_k = vld1q_f32(x_p); + const float32x4_t h_k = vld1q_f32(h_p); + // Compute and accumulate x * x and h * x. + x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k); + s_128 = vmlaq_f32(s_128, h_k, x_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + + x_p = &x[0]; + } + + // Combine the accumulated vector and scalar values. + float* v = reinterpret_cast<float*>(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + v = reinterpret_cast<float*>(&s_128); + s += v[0] + v[1] + v[2] + v[3]; + + // Compute the matched filter error. + const float e = std::min(32767.f, std::max(-32768.f, y[i] - s)); + *error_sum += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = 0.7f * e / x2_sum; + const float32x4_t alpha_128 = vmovq_n_f32(alpha); + + // filter = filter + 0.7 * (y - filter * x) / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + float32x4_t h_k = vld1q_f32(h_p); + const float32x4_t x_k = vld1q_f32(x_p); + // Compute h = h + alpha * x. + h_k = vmlaq_f32(h_k, alpha_128, x_k); + + // Store the result. + vst1q_f32(h_p, h_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + + x_p = &x[0]; + } + + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +void MatchedFilterCore_SSE2(size_t x_start_index, + float x2_sum_threshold, + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float> h, + bool* filters_updated, + float* error_sum) { + const int h_size = static_cast<int>(h.size()); + const int x_size = static_cast<int>(x.size()); + RTC_DCHECK_EQ(0, h_size % 4); + + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + + RTC_DCHECK_GT(x_size, x_start_index); + const float* x_p = &x[x_start_index]; + const float* h_p = &h[0]; + + // Initialize values for the accumulation. + __m128 s_128 = _mm_set1_ps(0); + __m128 x2_sum_128 = _mm_set1_ps(0); + float x2_sum = 0.f; + float s = 0; + + // Compute loop chunk sizes until, and after, the wraparound of the circular + // buffer for x. + const int chunk1 = + std::min(h_size, static_cast<int>(x_size - x_start_index)); + + // Perform the loop in two chunks. + const int chunk2 = h_size - chunk1; + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + const __m128 x_k = _mm_loadu_ps(x_p); + const __m128 h_k = _mm_loadu_ps(h_p); + const __m128 xx = _mm_mul_ps(x_k, x_k); + // Compute and accumulate x * x and h * x. + x2_sum_128 = _mm_add_ps(x2_sum_128, xx); + const __m128 hx = _mm_mul_ps(h_k, x_k); + s_128 = _mm_add_ps(s_128, hx); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + const float x_k = *x_p; + x2_sum += x_k * x_k; + s += *h_p * x_k; + } + + x_p = &x[0]; + } + + // Combine the accumulated vector and scalar values. + float* v = reinterpret_cast<float*>(&x2_sum_128); + x2_sum += v[0] + v[1] + v[2] + v[3]; + v = reinterpret_cast<float*>(&s_128); + s += v[0] + v[1] + v[2] + v[3]; + + // Compute the matched filter error. + const float e = std::min(32767.f, std::max(-32768.f, y[i] - s)); + *error_sum += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = 0.7f * e / x2_sum; + const __m128 alpha_128 = _mm_set1_ps(alpha); + + // filter = filter + 0.7 * (y - filter * x) / x * x. + float* h_p = &h[0]; + x_p = &x[x_start_index]; + + // Perform the loop in two chunks. + for (int limit : {chunk1, chunk2}) { + // Perform 128 bit vector operations. + const int limit_by_4 = limit >> 2; + for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) { + // Load the data into 128 bit vectors. + __m128 h_k = _mm_loadu_ps(h_p); + const __m128 x_k = _mm_loadu_ps(x_p); + + // Compute h = h + alpha * x. + const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k); + h_k = _mm_add_ps(h_k, alpha_x); + + // Store the result. + _mm_storeu_ps(h_p, h_k); + } + + // Perform non-vector operations for any remaining items. + for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) { + *h_p += alpha * *x_p; + } + + x_p = &x[0]; + } + + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1; + } +} +#endif + +void MatchedFilterCore(size_t x_start_index, + float x2_sum_threshold, + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float> h, + bool* filters_updated, + float* error_sum) { + // Process for all samples in the sub-block. + for (size_t i = 0; i < y.size(); ++i) { + // Apply the matched filter as filter * x, and compute x * x. + float x2_sum = 0.f; + float s = 0; + size_t x_index = x_start_index; + for (size_t k = 0; k < h.size(); ++k) { + x2_sum += x[x_index] * x[x_index]; + s += h[k] * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + } + + // Compute the matched filter error. + const float e = std::min(32767.f, std::max(-32768.f, y[i] - s)); + (*error_sum) += e * e; + + // Update the matched filter estimate in an NLMS manner. + if (x2_sum > x2_sum_threshold) { + RTC_DCHECK_LT(0.f, x2_sum); + const float alpha = 0.7f * e / x2_sum; + + // filter = filter + 0.7 * (y - filter * x) / x * x. + size_t x_index = x_start_index; + for (size_t k = 0; k < h.size(); ++k) { + h[k] += alpha * x[x_index]; + x_index = x_index < (x.size() - 1) ? x_index + 1 : 0; + } + *filters_updated = true; + } + + x_start_index = x_start_index > 0 ? x_start_index - 1 : x.size() - 1; + } +} + +} // namespace aec3 + +MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper, + Aec3Optimization optimization, + size_t sub_block_size, + size_t window_size_sub_blocks, + int num_matched_filters, + size_t alignment_shift_sub_blocks, + float excitation_limit) + : data_dumper_(data_dumper), + optimization_(optimization), + sub_block_size_(sub_block_size), + filter_intra_lag_shift_(alignment_shift_sub_blocks * sub_block_size_), + filters_( + num_matched_filters, + std::vector<float>(window_size_sub_blocks * sub_block_size_, 0.f)), + lag_estimates_(num_matched_filters), + filters_offsets_(num_matched_filters, 0), + excitation_limit_(excitation_limit) { + RTC_DCHECK(data_dumper); + RTC_DCHECK_LT(0, window_size_sub_blocks); + RTC_DCHECK((kBlockSize % sub_block_size) == 0); + RTC_DCHECK((sub_block_size % 4) == 0); +} + +MatchedFilter::~MatchedFilter() = default; + +void MatchedFilter::Reset() { + for (auto& f : filters_) { + std::fill(f.begin(), f.end(), 0.f); + } + + for (auto& l : lag_estimates_) { + l = MatchedFilter::LagEstimate(); + } +} + +void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture) { + RTC_DCHECK_EQ(sub_block_size_, capture.size()); + auto& y = capture; + + const float x2_sum_threshold = + filters_[0].size() * excitation_limit_ * excitation_limit_; + + // Apply all matched filters. + size_t alignment_shift = 0; + for (size_t n = 0; n < filters_.size(); ++n) { + float error_sum = 0.f; + bool filters_updated = false; + + size_t x_start_index = + (render_buffer.position + alignment_shift + sub_block_size_ - 1) % + render_buffer.buffer.size(); + + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: + aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum); + break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: + aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum); + break; +#endif + default: + aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, + render_buffer.buffer, y, filters_[n], + &filters_updated, &error_sum); + } + + // Compute anchor for the matched filter error. + const float error_sum_anchor = + std::inner_product(y.begin(), y.end(), y.begin(), 0.f); + + // Estimate the lag in the matched filter as the distance to the portion in + // the filter that contributes the most to the matched filter output. This + // is detected as the peak of the matched filter. + const size_t lag_estimate = std::distance( + filters_[n].begin(), + std::max_element( + filters_[n].begin(), filters_[n].end(), + [](float a, float b) -> bool { return a * a < b * b; })); + + // Update the lag estimates for the matched filter. + const float kMatchingFilterThreshold = 0.2f; + lag_estimates_[n] = LagEstimate( + error_sum_anchor - error_sum, + (lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) && + error_sum < kMatchingFilterThreshold * error_sum_anchor), + lag_estimate + alignment_shift, filters_updated); + + RTC_DCHECK_GE(10, filters_.size()); + switch (n) { + case 0: + data_dumper_->DumpRaw("aec3_correlator_0_h", filters_[0]); + break; + case 1: + data_dumper_->DumpRaw("aec3_correlator_1_h", filters_[1]); + break; + case 2: + data_dumper_->DumpRaw("aec3_correlator_2_h", filters_[2]); + break; + case 3: + data_dumper_->DumpRaw("aec3_correlator_3_h", filters_[3]); + break; + case 4: + data_dumper_->DumpRaw("aec3_correlator_4_h", filters_[4]); + break; + case 5: + data_dumper_->DumpRaw("aec3_correlator_5_h", filters_[5]); + break; + case 6: + data_dumper_->DumpRaw("aec3_correlator_6_h", filters_[6]); + break; + case 7: + data_dumper_->DumpRaw("aec3_correlator_7_h", filters_[7]); + break; + case 8: + data_dumper_->DumpRaw("aec3_correlator_8_h", filters_[8]); + break; + case 9: + data_dumper_->DumpRaw("aec3_correlator_9_h", filters_[9]); + break; + default: + RTC_NOTREACHED(); + } + + alignment_shift += filter_intra_lag_shift_; + } +} + +void MatchedFilter::LogFilterProperties(int sample_rate_hz, + size_t shift, + size_t downsampling_factor) const { + size_t alignment_shift = 0; + const int fs_by_1000 = LowestBandRate(sample_rate_hz) / 1000; + for (size_t k = 0; k < filters_.size(); ++k) { + int start = static_cast<int>(alignment_shift * downsampling_factor); + int end = static_cast<int>((alignment_shift + filters_[k].size()) * + downsampling_factor); + RTC_LOG(LS_INFO) << "Filter " << k << ": start: " + << (start - static_cast<int>(shift)) / fs_by_1000 + << " ms, end: " + << (end - static_cast<int>(shift)) / fs_by_1000 << " ms."; + alignment_shift += filter_intra_lag_shift_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter.h new file mode 100644 index 0000000000..c9bdc462e8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ + +#include <array> +#include <memory> +#include <vector> + +#include "api/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace aec3 { + +#if defined(WEBRTC_HAS_NEON) + +// Filter core for the matched filter that is optimized for NEON. +void MatchedFilterCore_NEON(size_t x_start_index, + float x2_sum_threshold, + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float> h, + bool* filters_updated, + float* error_sum); + +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +// Filter core for the matched filter that is optimized for SSE2. +void MatchedFilterCore_SSE2(size_t x_start_index, + float x2_sum_threshold, + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float> h, + bool* filters_updated, + float* error_sum); + +#endif + +// Filter core for the matched filter. +void MatchedFilterCore(size_t x_start_index, + float x2_sum_threshold, + rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float> h, + bool* filters_updated, + float* error_sum); + +} // namespace aec3 + +class ApmDataDumper; + +// Produces recursively updated cross-correlation estimates for several signal +// shifts where the intra-shift spacing is uniform. +class MatchedFilter { + public: + // Stores properties for the lag estimate corresponding to a particular signal + // shift. + struct LagEstimate { + LagEstimate() = default; + LagEstimate(float accuracy, bool reliable, size_t lag, bool updated) + : accuracy(accuracy), reliable(reliable), lag(lag), updated(updated) {} + + float accuracy = 0.f; + bool reliable = false; + size_t lag = 0; + bool updated = false; + }; + + MatchedFilter(ApmDataDumper* data_dumper, + Aec3Optimization optimization, + size_t sub_block_size, + size_t window_size_sub_blocks, + int num_matched_filters, + size_t alignment_shift_sub_blocks, + float excitation_limit); + + ~MatchedFilter(); + + // Updates the correlation with the values in the capture buffer. + void Update(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture); + + // Resets the matched filter. + void Reset(); + + // Returns the current lag estimates. + rtc::ArrayView<const MatchedFilter::LagEstimate> GetLagEstimates() const { + return lag_estimates_; + } + + // Returns the maximum filter lag. + size_t GetMaxFilterLag() const { + return filters_.size() * filter_intra_lag_shift_ + filters_[0].size(); + } + + // Log matched filter properties. + void LogFilterProperties(int sample_rate_hz, + size_t shift, + size_t downsampling_factor) const; + + private: + ApmDataDumper* const data_dumper_; + const Aec3Optimization optimization_; + const size_t sub_block_size_; + const size_t filter_intra_lag_shift_; + std::vector<std::vector<float>> filters_; + std::vector<LagEstimate> lag_estimates_; + std::vector<size_t> filters_offsets_; + const float excitation_limit_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(MatchedFilter); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc new file mode 100644 index 0000000000..92cb4f7736 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +MatchedFilterLagAggregator::MatchedFilterLagAggregator( + ApmDataDumper* data_dumper, + size_t max_filter_lag) + : data_dumper_(data_dumper), histogram_(max_filter_lag + 1, 0) { + RTC_DCHECK(data_dumper); + histogram_data_.fill(0); +} + +MatchedFilterLagAggregator::~MatchedFilterLagAggregator() = default; + +void MatchedFilterLagAggregator::Reset() { + std::fill(histogram_.begin(), histogram_.end(), 0); + histogram_data_.fill(0); + histogram_data_index_ = 0; +} + +rtc::Optional<size_t> MatchedFilterLagAggregator::Aggregate( + rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates) { + // Choose the strongest lag estimate as the best one. + float best_accuracy = 0.f; + int best_lag_estimate_index = -1; + for (size_t k = 0; k < lag_estimates.size(); ++k) { + if (lag_estimates[k].updated && lag_estimates[k].reliable) { + if (lag_estimates[k].accuracy > best_accuracy) { + best_accuracy = lag_estimates[k].accuracy; + best_lag_estimate_index = static_cast<int>(k); + } + } + } + + // TODO(peah): Remove this logging once all development is done. + data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_best_index", + best_lag_estimate_index); + + if (best_lag_estimate_index != -1) { + RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]); + RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]); + --histogram_[histogram_data_[histogram_data_index_]]; + + histogram_data_[histogram_data_index_] = + lag_estimates[best_lag_estimate_index].lag; + + RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]); + RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]); + ++histogram_[histogram_data_[histogram_data_index_]]; + + histogram_data_index_ = + (histogram_data_index_ + 1) % histogram_data_.size(); + + const int candidate = + std::distance(histogram_.begin(), + std::max_element(histogram_.begin(), histogram_.end())); + + if (histogram_[candidate] > 25) { + return candidate; + } + } + return rtc::nullopt; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h new file mode 100644 index 0000000000..c5dd24700e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ + +#include <vector> + +#include "api/optional.h" +#include "modules/audio_processing/aec3/matched_filter.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +// Aggregates lag estimates produced by the MatchedFilter class into a single +// reliable combined lag estimate. +class MatchedFilterLagAggregator { + public: + MatchedFilterLagAggregator(ApmDataDumper* data_dumper, size_t max_filter_lag); + ~MatchedFilterLagAggregator(); + + // Resets the aggregator. + void Reset(); + + // Aggregates the provided lag estimates. + rtc::Optional<size_t> Aggregate( + rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates); + + private: + ApmDataDumper* const data_dumper_; + std::vector<int> histogram_; + std::array<int, 250> histogram_data_; + int histogram_data_index_ = 0; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(MatchedFilterLagAggregator); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc new file mode 100644 index 0000000000..985ed43427 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h" + +#include <sstream> +#include <string> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr size_t kNumLagsBeforeDetection = 25; + +} // namespace + +// Verifies that the most accurate lag estimate is chosen. +TEST(MatchedFilterLagAggregator, MostAccurateLagChosen) { + constexpr size_t kLag1 = 5; + constexpr size_t kLag2 = 10; + ApmDataDumper data_dumper(0); + std::vector<MatchedFilter::LagEstimate> lag_estimates(2); + MatchedFilterLagAggregator aggregator(&data_dumper, std::max(kLag1, kLag2)); + lag_estimates[0] = MatchedFilter::LagEstimate(1.f, true, kLag1, true); + lag_estimates[1] = MatchedFilter::LagEstimate(0.5f, true, kLag2, true); + + for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { + EXPECT_FALSE(aggregator.Aggregate(lag_estimates)); + } + + rtc::Optional<size_t> aggregated_lag = aggregator.Aggregate(lag_estimates); + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, *aggregated_lag); + + lag_estimates[0] = MatchedFilter::LagEstimate(0.5f, true, kLag1, true); + lag_estimates[1] = MatchedFilter::LagEstimate(1.f, true, kLag2, true); + + for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { + aggregated_lag = aggregator.Aggregate(lag_estimates); + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, *aggregated_lag); + } + + aggregated_lag = aggregator.Aggregate(lag_estimates); + aggregated_lag = aggregator.Aggregate(lag_estimates); + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag2, *aggregated_lag); +} + +// Verifies that varying lag estimates causes lag estimates to not be deemed +// reliable. +TEST(MatchedFilterLagAggregator, + LagEstimateInvarianceRequiredForAggregatedLag) { + ApmDataDumper data_dumper(0); + std::vector<MatchedFilter::LagEstimate> lag_estimates(1); + MatchedFilterLagAggregator aggregator(&data_dumper, 100); + for (size_t k = 0; k < kNumLagsBeforeDetection * 100; ++k) { + lag_estimates[0] = MatchedFilter::LagEstimate(1.f, true, k % 100, true); + rtc::Optional<size_t> aggregated_lag = aggregator.Aggregate(lag_estimates); + EXPECT_FALSE(aggregated_lag); + } +} + +// Verifies that lag estimate updates are required to produce an updated lag +// aggregate. +TEST(MatchedFilterLagAggregator, + DISABLED_LagEstimateUpdatesRequiredForAggregatedLag) { + constexpr size_t kLag = 5; + ApmDataDumper data_dumper(0); + std::vector<MatchedFilter::LagEstimate> lag_estimates(1); + MatchedFilterLagAggregator aggregator(&data_dumper, kLag); + for (size_t k = 0; k < kNumLagsBeforeDetection * 10; ++k) { + lag_estimates[0] = MatchedFilter::LagEstimate(1.f, true, kLag, false); + rtc::Optional<size_t> aggregated_lag = aggregator.Aggregate(lag_estimates); + EXPECT_FALSE(aggregated_lag); + EXPECT_EQ(kLag, *aggregated_lag); + } +} + +// Verifies that an aggregated lag is persistent if the lag estimates do not +// change and that an aggregated lag is not produced without gaining lag +// estimate confidence. +TEST(MatchedFilterLagAggregator, DISABLED_PersistentAggregatedLag) { + constexpr size_t kLag1 = 5; + constexpr size_t kLag2 = 10; + ApmDataDumper data_dumper(0); + std::vector<MatchedFilter::LagEstimate> lag_estimates(1); + MatchedFilterLagAggregator aggregator(&data_dumper, std::max(kLag1, kLag2)); + rtc::Optional<size_t> aggregated_lag; + for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { + lag_estimates[0] = MatchedFilter::LagEstimate(1.f, true, kLag1, true); + aggregated_lag = aggregator.Aggregate(lag_estimates); + } + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, *aggregated_lag); + + for (size_t k = 0; k < kNumLagsBeforeDetection * 40; ++k) { + lag_estimates[0] = MatchedFilter::LagEstimate(1.f, false, kLag2, true); + aggregated_lag = aggregator.Aggregate(lag_estimates); + EXPECT_TRUE(aggregated_lag); + EXPECT_EQ(kLag1, *aggregated_lag); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-null data dumper. +TEST(MatchedFilterLagAggregator, NullDataDumper) { + EXPECT_DEATH(MatchedFilterLagAggregator(nullptr, 10), ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_unittest.cc new file mode 100644 index 0000000000..06004190ae --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_unittest.cc @@ -0,0 +1,377 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/matched_filter.h" + +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include <algorithm> +#include <sstream> +#include <string> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" + +namespace webrtc { +namespace aec3 { +namespace { + +std::string ProduceDebugText(size_t delay, size_t down_sampling_factor) { + std::ostringstream ss; + ss << "Delay: " << delay; + ss << ", Down sampling factor: " << down_sampling_factor; + return ss.str(); +} + +constexpr size_t kNumMatchedFilters = 10; +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; +constexpr size_t kWindowSizeSubBlocks = 32; +constexpr size_t kAlignmentShiftSubBlocks = kWindowSizeSubBlocks * 3 / 4; + +} // namespace + +#if defined(WEBRTC_HAS_NEON) +// Verifies that the optimized methods for NEON are similar to their reference +// counterparts. +TEST(MatchedFilter, TestNeonOptimizations) { + Random random_generator(42U); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + std::vector<float> x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector<float> y(sub_block_size); + std::vector<float> h_NEON(512); + std::vector<float> h(512); + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_NEON = false; + float error_sum_NEON = 0.f; + + MatchedFilterCore_NEON(x_index, h.size() * 150.f * 150.f, x, y, h_NEON, + &filters_updated_NEON, &error_sum_NEON); + + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, x, y, h, + &filters_updated, &error_sum); + + EXPECT_EQ(filters_updated, filters_updated_NEON); + EXPECT_NEAR(error_sum, error_sum_NEON, error_sum / 100000.f); + + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_NEON[j], 0.00001f); + } + + x_index = (x_index + sub_block_size) % x.size(); + } + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) +// Verifies that the optimized methods for SSE2 are bitexact to their reference +// counterparts. +TEST(MatchedFilter, TestSse2Optimizations) { + bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0); + if (use_sse2) { + Random random_generator(42U); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + std::vector<float> x(2000); + RandomizeSampleVector(&random_generator, x); + std::vector<float> y(sub_block_size); + std::vector<float> h_SSE2(512); + std::vector<float> h(512); + int x_index = 0; + for (int k = 0; k < 1000; ++k) { + RandomizeSampleVector(&random_generator, y); + + bool filters_updated = false; + float error_sum = 0.f; + bool filters_updated_SSE2 = false; + float error_sum_SSE2 = 0.f; + + MatchedFilterCore_SSE2(x_index, h.size() * 150.f * 150.f, x, y, h_SSE2, + &filters_updated_SSE2, &error_sum_SSE2); + + MatchedFilterCore(x_index, h.size() * 150.f * 150.f, x, y, h, + &filters_updated, &error_sum); + + EXPECT_EQ(filters_updated, filters_updated_SSE2); + EXPECT_NEAR(error_sum, error_sum_SSE2, error_sum / 100000.f); + + for (size_t j = 0; j < h.size(); ++j) { + EXPECT_NEAR(h[j], h_SSE2[j], 0.00001f); + } + + x_index = (x_index + sub_block_size) % x.size(); + } + } + } +} + +#endif + +// Verifies that the matched filter produces proper lag estimates for +// artificially +// delayed signals. +TEST(MatchedFilter, LagEstimation) { + Random random_generator(42U); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + std::vector<std::vector<float>> render(3, + std::vector<float>(kBlockSize, 0.f)); + std::array<float, kBlockSize> capture; + capture.fill(0.f); + ApmDataDumper data_dumper(0); + for (size_t delay_samples : {5, 64, 150, 200, 800, 1000}) { + SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor)); + Decimator capture_decimator(down_sampling_factor); + DelayBuffer<float> signal_delay_buffer(down_sampling_factor * + delay_samples); + MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, + kAlignmentShiftSubBlocks, 150); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, + kNumMatchedFilters), + GetRenderDelayBufferSize(down_sampling_factor, + kNumMatchedFilters))); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < (300 + delay_samples / sub_block_size); ++k) { + RandomizeSampleVector(&random_generator, render[0]); + signal_delay_buffer.Delay(render[0], capture); + render_delay_buffer->Insert(render); + render_delay_buffer->UpdateBuffers(); + std::array<float, kBlockSize> downsampled_capture_data; + rtc::ArrayView<float> downsampled_capture( + downsampled_capture_data.data(), sub_block_size); + capture_decimator.Decimate(capture, downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture); + } + + // Obtain the lag estimates. + auto lag_estimates = filter.GetLagEstimates(); + + // Find which lag estimate should be the most accurate. + rtc::Optional<size_t> expected_most_accurate_lag_estimate; + size_t alignment_shift_sub_blocks = 0; + for (size_t k = 0; k < kNumMatchedFilters; ++k) { + if ((alignment_shift_sub_blocks + 3 * kWindowSizeSubBlocks / 4) * + sub_block_size > + delay_samples) { + expected_most_accurate_lag_estimate = k > 0 ? k - 1 : 0; + break; + } + alignment_shift_sub_blocks += kAlignmentShiftSubBlocks; + } + ASSERT_TRUE(expected_most_accurate_lag_estimate); + + // Verify that the expected most accurate lag estimate is the most + // accurate estimate. + for (size_t k = 0; k < kNumMatchedFilters; ++k) { + if (k != *expected_most_accurate_lag_estimate && + k != (*expected_most_accurate_lag_estimate + 1)) { + EXPECT_TRUE( + lag_estimates[*expected_most_accurate_lag_estimate].accuracy > + lag_estimates[k].accuracy || + !lag_estimates[k].reliable || + !lag_estimates[*expected_most_accurate_lag_estimate].reliable); + } + } + + // Verify that all lag estimates are updated as expected for signals + // containing strong noise. + for (auto& le : lag_estimates) { + EXPECT_TRUE(le.updated); + } + + // Verify that the expected most accurate lag estimate is reliable. + EXPECT_TRUE( + lag_estimates[*expected_most_accurate_lag_estimate].reliable || + lag_estimates[std::min(*expected_most_accurate_lag_estimate + 1, + lag_estimates.size() - 1)] + .reliable); + + // Verify that the expected most accurate lag estimate is correct. + if (lag_estimates[*expected_most_accurate_lag_estimate].reliable) { + EXPECT_TRUE(delay_samples == + lag_estimates[*expected_most_accurate_lag_estimate].lag); + } else { + EXPECT_TRUE( + delay_samples == + lag_estimates[std::min(*expected_most_accurate_lag_estimate + 1, + lag_estimates.size() - 1)] + .lag); + } + } + } +} + +// Verifies that the matched filter does not produce reliable and accurate +// estimates for uncorrelated render and capture signals. +TEST(MatchedFilter, LagNotReliableForUncorrelatedRenderAndCapture) { + Random random_generator(42U); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + std::vector<std::vector<float>> render(3, + std::vector<float>(kBlockSize, 0.f)); + std::array<float, kBlockSize> capture_data; + rtc::ArrayView<float> capture(capture_data.data(), sub_block_size); + std::fill(capture.begin(), capture.end(), 0.f); + ApmDataDumper data_dumper(0); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, kNumMatchedFilters), + GetRenderDelayBufferSize(down_sampling_factor, + kNumMatchedFilters))); + MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, + kAlignmentShiftSubBlocks, 150); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, render[0]); + RandomizeSampleVector(&random_generator, capture); + render_delay_buffer->Insert(render); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), capture); + } + + // Obtain the lag estimates. + auto lag_estimates = filter.GetLagEstimates(); + EXPECT_EQ(kNumMatchedFilters, lag_estimates.size()); + + // Verify that no lag estimates are reliable. + for (auto& le : lag_estimates) { + EXPECT_FALSE(le.reliable); + } + } +} + +// Verifies that the matched filter does not produce updated lag estimates for +// render signals of low level. +TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) { + Random random_generator(42U); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + + std::vector<std::vector<float>> render(3, + std::vector<float>(kBlockSize, 0.f)); + std::array<float, kBlockSize> capture; + capture.fill(0.f); + ApmDataDumper data_dumper(0); + MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size, + kWindowSizeSubBlocks, kNumMatchedFilters, + kAlignmentShiftSubBlocks, 150); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + 3, down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, kNumMatchedFilters), + GetRenderDelayBufferSize(down_sampling_factor, + kNumMatchedFilters))); + Decimator capture_decimator(down_sampling_factor); + + // Analyze the correlation between render and capture. + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, render[0]); + for (auto& render_k : render[0]) { + render_k *= 149.f / 32767.f; + } + std::copy(render[0].begin(), render[0].end(), capture.begin()); + std::array<float, kBlockSize> downsampled_capture_data; + rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(), + sub_block_size); + capture_decimator.Decimate(capture, downsampled_capture); + filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), + downsampled_capture); + } + + // Obtain the lag estimates. + auto lag_estimates = filter.GetLagEstimates(); + EXPECT_EQ(kNumMatchedFilters, lag_estimates.size()); + + // Verify that no lag estimates are updated and that no lag estimates are + // reliable. + for (auto& le : lag_estimates) { + EXPECT_FALSE(le.updated); + EXPECT_FALSE(le.reliable); + } + } +} + +// Verifies that the correct number of lag estimates are produced for a certain +// number of alignment shifts. +TEST(MatchedFilter, NumberOfLagEstimates) { + ApmDataDumper data_dumper(0); + for (auto down_sampling_factor : kDownSamplingFactors) { + const size_t sub_block_size = kBlockSize / down_sampling_factor; + for (size_t num_matched_filters = 0; num_matched_filters < 10; + ++num_matched_filters) { + MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size, + 32, num_matched_filters, 1, 150); + EXPECT_EQ(num_matched_filters, filter.GetLagEstimates().size()); + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for non-zero windows size. +TEST(MatchedFilter, ZeroWindowSize) { + ApmDataDumper data_dumper(0); + EXPECT_DEATH( + MatchedFilter(&data_dumper, DetectOptimization(), 16, 0, 1, 1, 150), ""); +} + +// Verifies the check for non-null data dumper. +TEST(MatchedFilter, NullDataDumper) { + EXPECT_DEATH(MatchedFilter(nullptr, DetectOptimization(), 16, 1, 1, 1, 150), + ""); +} + +// Verifies the check for that the sub block size is a multiple of 4. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST(MatchedFilter, DISABLED_BlockSizeMultipleOf4) { + ApmDataDumper data_dumper(0); + EXPECT_DEATH( + MatchedFilter(&data_dumper, DetectOptimization(), 15, 1, 1, 1, 150), ""); +} + +// Verifies the check for that there is an integer number of sub blocks that add +// up to a block size. +// TODO(peah): Activate the unittest once the required code has been landed. +TEST(MatchedFilter, DISABLED_SubBlockSizeAddsUpToBlockSize) { + ApmDataDumper data_dumper(0); + EXPECT_DEATH( + MatchedFilter(&data_dumper, DetectOptimization(), 12, 1, 1, 1, 150), ""); +} + +#endif + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_block_processor.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_block_processor.h new file mode 100644 index 0000000000..5fff456185 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_block_processor.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ + +#include <vector> + +#include "modules/audio_processing/aec3/block_processor.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockBlockProcessor : public BlockProcessor { + public: + virtual ~MockBlockProcessor() {} + + MOCK_METHOD3(ProcessCapture, + void(bool level_change, + bool saturated_microphone_signal, + std::vector<std::vector<float>>* capture_block)); + MOCK_METHOD1(BufferRender, + void(const std::vector<std::vector<float>>& block)); + MOCK_METHOD1(UpdateEchoLeakageStatus, void(bool leakage_detected)); + MOCK_CONST_METHOD1(GetMetrics, void(EchoControl::Metrics* metrics)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_BLOCK_PROCESSOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h new file mode 100644 index 0000000000..44d3778e6d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_echo_remover.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ + +#include <vector> + +#include "api/optional.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/echo_remover.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockEchoRemover : public EchoRemover { + public: + virtual ~MockEchoRemover() = default; + + MOCK_METHOD5(ProcessCapture, + void(const rtc::Optional<size_t>& echo_path_delay_samples, + const EchoPathVariability& echo_path_variability, + bool capture_signal_saturation, + const RenderBuffer& render_buffer, + std::vector<std::vector<float>>* capture)); + + MOCK_METHOD1(UpdateEchoLeakageStatus, void(bool leakage_detected)); + MOCK_CONST_METHOD1(GetMetrics, void(EchoControl::Metrics* metrics)); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_ECHO_REMOVER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h new file mode 100644 index 0000000000..6b5870901d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ + +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockRenderDelayBuffer : public RenderDelayBuffer { + public: + explicit MockRenderDelayBuffer(int sample_rate_hz) + : render_buffer_(Aec3Optimization::kNone, + NumBandsForRate(sample_rate_hz), + GetRenderDelayBufferSize(4, 4), + std::vector<size_t>(1, kAdaptiveFilterLength)), + downsampled_render_buffer_(GetDownSampledBufferSize(4, 4)) { + ON_CALL(*this, GetRenderBuffer()) + .WillByDefault( + testing::Invoke(this, &MockRenderDelayBuffer::FakeGetRenderBuffer)); + ON_CALL(*this, GetDownsampledRenderBuffer()) + .WillByDefault(testing::Invoke( + this, &MockRenderDelayBuffer::FakeGetDownsampledRenderBuffer)); + } + virtual ~MockRenderDelayBuffer() = default; + + MOCK_METHOD0(Reset, void()); + MOCK_METHOD1(Insert, bool(const std::vector<std::vector<float>>& block)); + MOCK_METHOD0(UpdateBuffers, bool()); + MOCK_METHOD1(SetDelay, void(size_t delay)); + MOCK_CONST_METHOD0(Delay, size_t()); + MOCK_CONST_METHOD0(MaxDelay, size_t()); + MOCK_CONST_METHOD0(IsBlockAvailable, bool()); + MOCK_CONST_METHOD0(GetRenderBuffer, const RenderBuffer&()); + MOCK_CONST_METHOD0(GetDownsampledRenderBuffer, + const DownsampledRenderBuffer&()); + + private: + const RenderBuffer& FakeGetRenderBuffer() const { return render_buffer_; } + const DownsampledRenderBuffer& FakeGetDownsampledRenderBuffer() const { + return downsampled_render_buffer_; + } + RenderBuffer render_buffer_; + DownsampledRenderBuffer downsampled_render_buffer_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h new file mode 100644 index 0000000000..b1f1cbe6cf --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/mock/mock_render_delay_controller.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ + +#include "api/array_view.h" +#include "api/optional.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_controller.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { + +class MockRenderDelayController : public RenderDelayController { + public: + virtual ~MockRenderDelayController() = default; + + MOCK_METHOD0(Reset, void()); + MOCK_METHOD1(SetDelay, void(size_t render_delay)); + MOCK_METHOD2(GetDelay, + size_t(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture)); + MOCK_CONST_METHOD0(AlignmentHeadroomSamples, rtc::Optional<size_t>()); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_MOCK_MOCK_RENDER_DELAY_CONTROLLER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector.cc new file mode 100644 index 0000000000..4f547d98d9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/output_selector.h" + +#include <algorithm> +#include <numeric> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Performs the transition between the signals in a smooth manner. +void SmoothFrameTransition(bool from_y_to_e, + rtc::ArrayView<const float> e, + rtc::ArrayView<float> y) { + RTC_DCHECK_LT(0u, e.size()); + RTC_DCHECK_EQ(y.size(), e.size()); + + const float change_factor = (from_y_to_e ? 1.f : -1.f) / e.size(); + float averaging = from_y_to_e ? 0.f : 1.f; + for (size_t k = 0; k < e.size(); ++k) { + y[k] += averaging * (e[k] - y[k]); + averaging += change_factor; + } + RTC_DCHECK_EQ(from_y_to_e ? 1.f : 0.f, averaging); +} + +} // namespace + +OutputSelector::OutputSelector() = default; + +OutputSelector::~OutputSelector() = default; + +void OutputSelector::FormLinearOutput( + bool use_subtractor_output, + rtc::ArrayView<const float> subtractor_output, + rtc::ArrayView<float> capture) { + RTC_DCHECK_EQ(subtractor_output.size(), capture.size()); + rtc::ArrayView<const float>& e_main = subtractor_output; + rtc::ArrayView<float> y = capture; + + if (use_subtractor_output != use_subtractor_output_) { + use_subtractor_output_ = use_subtractor_output; + SmoothFrameTransition(use_subtractor_output_, e_main, y); + } else if (use_subtractor_output_) { + std::copy(e_main.begin(), e_main.end(), y.begin()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector.h new file mode 100644 index 0000000000..a406c61745 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_OUTPUT_SELECTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_OUTPUT_SELECTOR_H_ + +#include "api/array_view.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Performs the selection between which of the linear aec output and the +// microphone signal should be used as the echo suppressor output. +class OutputSelector { + public: + OutputSelector(); + ~OutputSelector(); + + // Forms the most appropriate output signal. + void FormLinearOutput(bool use_subtractor_output, + rtc::ArrayView<const float> subtractor_output, + rtc::ArrayView<float> capture); + + // Returns true if the linear aec output is the one used. + bool UseSubtractorOutput() const { return use_subtractor_output_; } + + private: + bool use_subtractor_output_ = false; + RTC_DISALLOW_COPY_AND_ASSIGN(OutputSelector); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_OUTPUT_SELECTOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector_unittest.cc new file mode 100644 index 0000000000..c7add1c838 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector_unittest.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/output_selector.h" + +#include <algorithm> +#include <array> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "test/gtest.h" + +namespace webrtc { + +// Verifies that the switching between the signals in the output works as +// intended. +TEST(OutputSelector, ProperSwitching) { + OutputSelector selector; + + std::array<float, kBlockSize> y; + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> e_ref; + std::array<float, kBlockSize> y_ref; + auto init_blocks = [](std::array<float, kBlockSize>* e, + std::array<float, kBlockSize>* y) { + e->fill(10.f); + y->fill(20.f); + }; + + init_blocks(&e_ref, &y_ref); + + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_EQ(y_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(true, e, y); + EXPECT_NE(e_ref, y); + EXPECT_NE(y_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(true, e, y); + EXPECT_EQ(e_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(true, e, y); + EXPECT_EQ(e_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_NE(e_ref, y); + EXPECT_NE(y_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_EQ(y_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_EQ(y_ref, y); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer.cc new file mode 100644 index 0000000000..fa86ea6b36 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_buffer.h" + +#include <algorithm> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +RenderBuffer::RenderBuffer(Aec3Optimization optimization, + size_t num_bands, + size_t num_partitions, + const std::vector<size_t> num_ffts_for_spectral_sums) + : optimization_(optimization), + fft_buffer_(num_partitions), + spectrum_buffer_(num_partitions, std::array<float, kFftLengthBy2Plus1>()), + spectral_sums_(num_ffts_for_spectral_sums.size(), + std::array<float, kFftLengthBy2Plus1>()), + last_block_(num_bands, std::vector<float>(kBlockSize, 0.f)), + fft_() { + // Current implementation only allows a maximum of one spectral sum lengths. + RTC_DCHECK_EQ(1, num_ffts_for_spectral_sums.size()); + spectral_sums_length_ = num_ffts_for_spectral_sums[0]; + RTC_DCHECK_GE(fft_buffer_.size(), spectral_sums_length_); + + Clear(); +} + +RenderBuffer::~RenderBuffer() = default; + +void RenderBuffer::Clear() { + position_ = 0; + for (auto& sum : spectral_sums_) { + sum.fill(0.f); + } + + for (auto& spectrum : spectrum_buffer_) { + spectrum.fill(0.f); + } + + for (auto& fft : fft_buffer_) { + fft.Clear(); + } + + for (auto& b : last_block_) { + std::fill(b.begin(), b.end(), 0.f); + } +} + +void RenderBuffer::Insert(const std::vector<std::vector<float>>& block) { + // Compute the FFT of the data in the lowest band. + FftData X; + fft_.PaddedFft(block[0], last_block_[0], &X); + + // Copy the last render frame. + RTC_DCHECK_EQ(last_block_.size(), block.size()); + for (size_t k = 0; k < block.size(); ++k) { + RTC_DCHECK_EQ(last_block_[k].size(), block[k].size()); + std::copy(block[k].begin(), block[k].end(), last_block_[k].begin()); + } + + // Insert X into the buffer. + position_ = position_ > 0 ? position_ - 1 : fft_buffer_.size() - 1; + fft_buffer_[position_].Assign(X); + + // Compute and insert the spectrum for the FFT into the spectrum buffer. + X.Spectrum(optimization_, &spectrum_buffer_[position_]); + + // Pre-compute and cache the spectral sums. + std::copy(spectrum_buffer_[position_].begin(), + spectrum_buffer_[position_].end(), spectral_sums_[0].begin()); + size_t position = (position_ + 1) % fft_buffer_.size(); + for (size_t j = 1; j < spectral_sums_length_; ++j) { + const std::array<float, kFftLengthBy2Plus1>& spectrum = + spectrum_buffer_[position]; + + for (size_t k = 0; k < spectral_sums_[0].size(); ++k) { + spectral_sums_[0][k] += spectrum[k]; + } + + position = position < (fft_buffer_.size() - 1) ? position + 1 : 0; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer.h new file mode 100644 index 0000000000..3288ff36ad --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ + +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Provides a buffer of the render data for the echo remover. +class RenderBuffer { + public: + // The constructor takes, besides from the other parameters, a vector + // containing the number of FFTs that will be included in the spectral sums in + // the call to SpectralSum. + RenderBuffer(Aec3Optimization optimization, + size_t num_bands, + size_t size, + const std::vector<size_t> num_ffts_for_spectral_sums); + ~RenderBuffer(); + + // Clears the buffer. + void Clear(); + + // Insert a block into the buffer. + void Insert(const std::vector<std::vector<float>>& block); + + // Gets the last inserted block. + const std::vector<std::vector<float>>& MostRecentBlock() const { + return last_block_; + } + + // Get the spectrum from one of the FFTs in the buffer + const std::array<float, kFftLengthBy2Plus1>& Spectrum( + size_t buffer_offset_ffts) const { + return spectrum_buffer_[(position_ + buffer_offset_ffts) % + fft_buffer_.size()]; + } + + // Returns the sum of the spectrums for a certain number of FFTs. + const std::array<float, kFftLengthBy2Plus1>& SpectralSum( + size_t num_ffts) const { + RTC_DCHECK_EQ(spectral_sums_length_, num_ffts); + return spectral_sums_[0]; + } + + // Returns the circular buffer. + rtc::ArrayView<const FftData> Buffer() const { return fft_buffer_; } + + // Returns the current position in the circular buffer + size_t Position() const { return position_; } + + private: + const Aec3Optimization optimization_; + std::vector<FftData> fft_buffer_; + std::vector<std::array<float, kFftLengthBy2Plus1>> spectrum_buffer_; + size_t spectral_sums_length_; + std::vector<std::array<float, kFftLengthBy2Plus1>> spectral_sums_; + size_t position_ = 0; + std::vector<std::vector<float>> last_block_; + const Aec3Fft fft_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderBuffer); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer_unittest.cc new file mode 100644 index 0000000000..1498f4ea25 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer_unittest.cc @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_buffer.h" + +#include <algorithm> +#include <functional> +#include <vector> + +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the provided numbers of Ffts to include in the +// spectral sum. +TEST(RenderBuffer, TooLargeNumberOfSpectralSums) { + EXPECT_DEATH( + RenderBuffer(Aec3Optimization::kNone, 3, 1, std::vector<size_t>(2, 1)), + ""); +} + +TEST(RenderBuffer, TooSmallNumberOfSpectralSums) { + EXPECT_DEATH( + RenderBuffer(Aec3Optimization::kNone, 3, 1, std::vector<size_t>()), ""); +} + +// Verifies the feasibility check for the provided number of Ffts to include in +// the spectral. +TEST(RenderBuffer, FeasibleNumberOfFftsInSum) { + EXPECT_DEATH( + RenderBuffer(Aec3Optimization::kNone, 3, 1, std::vector<size_t>(1, 2)), + ""); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc new file mode 100644 index 0000000000..d2ead63b02 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_buffer.h" + +#include <string.h> +#include <algorithm> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/checks.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { + +class ApiCallJitterBuffer { + public: + explicit ApiCallJitterBuffer(size_t num_bands) { + buffer_.fill(std::vector<std::vector<float>>( + num_bands, std::vector<float>(kBlockSize, 0.f))); + } + + ~ApiCallJitterBuffer() = default; + + void Reset() { + size_ = 0; + last_insert_index_ = 0; + } + + void Insert(const std::vector<std::vector<float>>& block) { + RTC_DCHECK_LT(size_, buffer_.size()); + last_insert_index_ = (last_insert_index_ + 1) % buffer_.size(); + RTC_DCHECK_EQ(buffer_[last_insert_index_].size(), block.size()); + RTC_DCHECK_EQ(buffer_[last_insert_index_][0].size(), block[0].size()); + for (size_t k = 0; k < block.size(); ++k) { + std::copy(block[k].begin(), block[k].end(), + buffer_[last_insert_index_][k].begin()); + } + ++size_; + } + + void Remove(std::vector<std::vector<float>>* block) { + RTC_DCHECK_LT(0, size_); + --size_; + const size_t extract_index = + (last_insert_index_ - size_ + buffer_.size()) % buffer_.size(); + for (size_t k = 0; k < block->size(); ++k) { + std::copy(buffer_[extract_index][k].begin(), + buffer_[extract_index][k].end(), (*block)[k].begin()); + } + } + + size_t Size() const { return size_; } + bool Full() const { return size_ >= (buffer_.size()); } + bool Empty() const { return size_ == 0; } + + private: + std::array<std::vector<std::vector<float>>, kMaxApiCallsJitterBlocks> buffer_; + size_t size_ = 0; + int last_insert_index_ = 0; +}; + +class RenderDelayBufferImpl final : public RenderDelayBuffer { + public: + RenderDelayBufferImpl(size_t num_bands, + size_t down_sampling_factor, + size_t downsampled_render_buffer_size, + size_t render_delay_buffer_size); + ~RenderDelayBufferImpl() override; + + void Reset() override; + bool Insert(const std::vector<std::vector<float>>& block) override; + bool UpdateBuffers() override; + void SetDelay(size_t delay) override; + size_t Delay() const override { return delay_; } + + const RenderBuffer& GetRenderBuffer() const override { return fft_buffer_; } + + const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const override { + return downsampled_render_buffer_; + } + + private: + static int instance_count_; + std::unique_ptr<ApmDataDumper> data_dumper_; + const Aec3Optimization optimization_; + const size_t down_sampling_factor_; + const size_t sub_block_size_; + std::vector<std::vector<std::vector<float>>> buffer_; + size_t delay_ = 0; + size_t last_insert_index_ = 0; + RenderBuffer fft_buffer_; + DownsampledRenderBuffer downsampled_render_buffer_; + Decimator render_decimator_; + ApiCallJitterBuffer api_call_jitter_buffer_; + const std::vector<std::vector<float>> zero_block_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderDelayBufferImpl); +}; + +int RenderDelayBufferImpl::instance_count_ = 0; + +RenderDelayBufferImpl::RenderDelayBufferImpl( + size_t num_bands, + size_t down_sampling_factor, + size_t downsampled_render_buffer_size, + size_t render_delay_buffer_size) + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + optimization_(DetectOptimization()), + down_sampling_factor_(down_sampling_factor), + sub_block_size_(down_sampling_factor_ > 0 + ? kBlockSize / down_sampling_factor + : kBlockSize), + buffer_( + render_delay_buffer_size, + std::vector<std::vector<float>>(num_bands, + std::vector<float>(kBlockSize, 0.f))), + fft_buffer_( + optimization_, + num_bands, + std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength), + std::vector<size_t>(1, kAdaptiveFilterLength)), + downsampled_render_buffer_(downsampled_render_buffer_size), + render_decimator_(down_sampling_factor_), + api_call_jitter_buffer_(num_bands), + zero_block_(num_bands, std::vector<float>(kBlockSize, 0.f)) { + RTC_DCHECK_LT(buffer_.size(), downsampled_render_buffer_.buffer.size()); +} + +RenderDelayBufferImpl::~RenderDelayBufferImpl() = default; + +void RenderDelayBufferImpl::Reset() { + // Empty all data in the buffers. + delay_ = 0; + last_insert_index_ = 0; + downsampled_render_buffer_.position = 0; + std::fill(downsampled_render_buffer_.buffer.begin(), + downsampled_render_buffer_.buffer.end(), 0.f); + fft_buffer_.Clear(); + api_call_jitter_buffer_.Reset(); + for (auto& c : buffer_) { + for (auto& b : c) { + std::fill(b.begin(), b.end(), 0.f); + } + } +} + +bool RenderDelayBufferImpl::Insert( + const std::vector<std::vector<float>>& block) { + RTC_DCHECK_EQ(block.size(), buffer_[0].size()); + RTC_DCHECK_EQ(block[0].size(), buffer_[0][0].size()); + + if (api_call_jitter_buffer_.Full()) { + // Report buffer overrun and let the caller handle the overrun. + return false; + } + api_call_jitter_buffer_.Insert(block); + + return true; +} + +bool RenderDelayBufferImpl::UpdateBuffers() { + bool underrun = true; + // Update the buffers with a new block if such is available, otherwise insert + // a block of silence. + if (api_call_jitter_buffer_.Size() > 0) { + last_insert_index_ = (last_insert_index_ + 1) % buffer_.size(); + api_call_jitter_buffer_.Remove(&buffer_[last_insert_index_]); + underrun = false; + } + + downsampled_render_buffer_.position = + (downsampled_render_buffer_.position - sub_block_size_ + + downsampled_render_buffer_.buffer.size()) % + downsampled_render_buffer_.buffer.size(); + + rtc::ArrayView<const float> input( + underrun ? zero_block_[0].data() : buffer_[last_insert_index_][0].data(), + kBlockSize); + rtc::ArrayView<float> output(downsampled_render_buffer_.buffer.data() + + downsampled_render_buffer_.position, + sub_block_size_); + data_dumper_->DumpWav("aec3_render_decimator_input", input.size(), + input.data(), 16000, 1); + render_decimator_.Decimate(input, output); + data_dumper_->DumpWav("aec3_render_decimator_output", output.size(), + output.data(), 16000 / down_sampling_factor_, 1); + for (size_t k = 0; k < output.size() / 2; ++k) { + float tmp = output[k]; + output[k] = output[output.size() - 1 - k]; + output[output.size() - 1 - k] = tmp; + } + + if (underrun) { + fft_buffer_.Insert(zero_block_); + } else { + fft_buffer_.Insert(buffer_[(last_insert_index_ - delay_ + buffer_.size()) % + buffer_.size()]); + } + return !underrun; +} + +void RenderDelayBufferImpl::SetDelay(size_t delay) { + if (delay_ == delay) { + return; + } + + // If there is a new delay set, clear the fft buffer. + fft_buffer_.Clear(); + + if ((buffer_.size() - 1) < delay) { + // If the desired delay is larger than the delay buffer, shorten the delay + // buffer size to achieve the desired alignment with the available buffer + // size. + downsampled_render_buffer_.position = + (downsampled_render_buffer_.position + + sub_block_size_ * (delay - (buffer_.size() - 1))) % + downsampled_render_buffer_.buffer.size(); + + last_insert_index_ = + (last_insert_index_ - (delay - (buffer_.size() - 1)) + buffer_.size()) % + buffer_.size(); + delay_ = buffer_.size() - 1; + } else { + delay_ = delay; + } +} + +} // namespace + +RenderDelayBuffer* RenderDelayBuffer::Create( + size_t num_bands, + size_t down_sampling_factor, + size_t downsampled_render_buffer_size, + size_t render_delay_buffer_size) { + return new RenderDelayBufferImpl(num_bands, down_sampling_factor, + downsampled_render_buffer_size, + render_delay_buffer_size); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer.h new file mode 100644 index 0000000000..8f5de40752 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ + +#include <stddef.h> +#include <array> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" + +namespace webrtc { + +// Class for buffering the incoming render blocks such that these may be +// extracted with a specified delay. +class RenderDelayBuffer { + public: + static RenderDelayBuffer* Create(size_t num_bands, + size_t down_sampling_factor, + size_t downsampled_render_buffer_size, + size_t render_delay_buffer_size); + virtual ~RenderDelayBuffer() = default; + + // Resets the buffer data. + virtual void Reset() = 0; + + // Inserts a block into the buffer and returns true if the insert is + // successful. + virtual bool Insert(const std::vector<std::vector<float>>& block) = 0; + + // Updates the buffers one step based on the specified buffer delay. Returns + // true if there was no overrun, otherwise returns false. + virtual bool UpdateBuffers() = 0; + + // Sets the buffer delay. + virtual void SetDelay(size_t delay) = 0; + + // Gets the buffer delay. + virtual size_t Delay() const = 0; + + // Returns the render buffer for the echo remover. + virtual const RenderBuffer& GetRenderBuffer() const = 0; + + // Returns the downsampled render buffer. + virtual const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc new file mode 100644 index 0000000000..3e0abea753 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer_unittest.cc @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_buffer.h" + +#include <memory> +#include <sstream> +#include <string> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +constexpr size_t kDownSamplingFactor = 4; +constexpr size_t kNumMatchedFilters = 4; + +} // namespace + +// Verifies that the buffer overflow is correctly reported. +TEST(RenderDelayBuffer, BufferOverflow) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create( + NumBandsForRate(rate), kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + std::vector<std::vector<float>> block_to_insert( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + for (size_t k = 0; k < kMaxApiCallsJitterBlocks; ++k) { + EXPECT_TRUE(delay_buffer->Insert(block_to_insert)); + } + EXPECT_FALSE(delay_buffer->Insert(block_to_insert)); + } +} + +// Verifies that the check for available block works. +TEST(RenderDelayBuffer, AvailableBlock) { + constexpr size_t kNumBands = 1; + std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create( + kNumBands, kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + std::vector<std::vector<float>> input_block( + kNumBands, std::vector<float>(kBlockSize, 1.f)); + EXPECT_TRUE(delay_buffer->Insert(input_block)); + delay_buffer->UpdateBuffers(); +} + +// Verifies the SetDelay method. +TEST(RenderDelayBuffer, SetDelay) { + std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create( + 1, kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + EXPECT_EQ(0u, delay_buffer->Delay()); + for (size_t delay = 0; delay < 20; ++delay) { + delay_buffer->SetDelay(delay); + EXPECT_EQ(delay, delay_buffer->Delay()); + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for feasible delay. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(RenderDelayBuffer, DISABLED_WrongDelay) { + std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create( + 3, kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + EXPECT_DEATH(delay_buffer->SetDelay(21), ""); +} + +// Verifies the check for the number of bands in the inserted blocks. +TEST(RenderDelayBuffer, WrongNumberOfBands) { + for (auto rate : {16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create( + NumBandsForRate(rate), kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + std::vector<std::vector<float>> block_to_insert( + NumBandsForRate(rate < 48000 ? rate + 16000 : 16000), + std::vector<float>(kBlockSize, 0.f)); + EXPECT_DEATH(delay_buffer->Insert(block_to_insert), ""); + } +} + +// Verifies the check of the length of the inserted blocks. +TEST(RenderDelayBuffer, WrongBlockLength) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<RenderDelayBuffer> delay_buffer(RenderDelayBuffer::Create( + 3, kDownSamplingFactor, + GetDownSampledBufferSize(kDownSamplingFactor, kNumMatchedFilters), + GetRenderDelayBufferSize(kDownSamplingFactor, kNumMatchedFilters))); + std::vector<std::vector<float>> block_to_insert( + NumBandsForRate(rate), std::vector<float>(kBlockSize - 1, 0.f)); + EXPECT_DEATH(delay_buffer->Insert(block_to_insert), ""); + } +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller.cc new file mode 100644 index 0000000000..2c1f263ee7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec3/render_delay_controller.h" + +#include <algorithm> +#include <memory> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/echo_path_delay_estimator.h" +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +namespace { + +class RenderDelayControllerImpl final : public RenderDelayController { + public: + RenderDelayControllerImpl(const EchoCanceller3Config& config, + int sample_rate_hz); + ~RenderDelayControllerImpl() override; + void Reset() override; + void SetDelay(size_t render_delay) override; + size_t GetDelay(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture) override; + rtc::Optional<size_t> AlignmentHeadroomSamples() const override { + return headroom_samples_; + } + + private: + static int instance_count_; + std::unique_ptr<ApmDataDumper> data_dumper_; + const size_t default_delay_; + size_t delay_; + size_t blocks_since_last_delay_estimate_ = 300000; + int echo_path_delay_samples_; + size_t align_call_counter_ = 0; + rtc::Optional<size_t> headroom_samples_; + std::vector<float> capture_delay_buffer_; + int capture_delay_buffer_index_ = 0; + RenderDelayControllerMetrics metrics_; + EchoPathDelayEstimator delay_estimator_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderDelayControllerImpl); +}; + +size_t ComputeNewBufferDelay(size_t current_delay, + size_t echo_path_delay_samples) { + // The below division is not exact and the truncation is intended. + const int echo_path_delay_blocks = echo_path_delay_samples / kBlockSize; + constexpr int kDelayHeadroomBlocks = 1; + + // Compute the buffer delay increase required to achieve the desired latency. + size_t new_delay = std::max(echo_path_delay_blocks - kDelayHeadroomBlocks, 0); + + // Add hysteresis. + if (new_delay == current_delay + 1) { + new_delay = current_delay; + } + + return new_delay; +} + +int RenderDelayControllerImpl::instance_count_ = 0; + +RenderDelayControllerImpl::RenderDelayControllerImpl( + const EchoCanceller3Config& config, + int sample_rate_hz) + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + default_delay_( + std::max(config.delay.default_delay, kMinEchoPathDelayBlocks)), + delay_(default_delay_), + echo_path_delay_samples_(default_delay_ * kBlockSize), + capture_delay_buffer_(kBlockSize * (kMaxApiCallsJitterBlocks + 2), 0.f), + delay_estimator_(data_dumper_.get(), config) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); + delay_estimator_.LogDelayEstimationProperties(sample_rate_hz, + capture_delay_buffer_.size()); +} + +RenderDelayControllerImpl::~RenderDelayControllerImpl() = default; + +void RenderDelayControllerImpl::Reset() { + delay_ = default_delay_; + blocks_since_last_delay_estimate_ = 300000; + echo_path_delay_samples_ = delay_ * kBlockSize; + align_call_counter_ = 0; + headroom_samples_ = rtc::nullopt; + std::fill(capture_delay_buffer_.begin(), capture_delay_buffer_.end(), 0.f); + delay_estimator_.Reset(); +} + +void RenderDelayControllerImpl::SetDelay(size_t render_delay) { + if (delay_ != render_delay) { + // If a the delay set does not match the actual delay, reset the delay + // controller. + Reset(); + delay_ = render_delay; + } +} + +size_t RenderDelayControllerImpl::GetDelay( + const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture) { + RTC_DCHECK_EQ(kBlockSize, capture.size()); + + ++align_call_counter_; + + // Estimate the delay with a delayed capture signal in order to catch + // noncausal delays. + RTC_DCHECK_LT(capture_delay_buffer_index_ + kBlockSize - 1, + capture_delay_buffer_.size()); + const rtc::Optional<size_t> echo_path_delay_samples_shifted = + delay_estimator_.EstimateDelay( + render_buffer, + rtc::ArrayView<const float>( + &capture_delay_buffer_[capture_delay_buffer_index_], kBlockSize)); + std::copy(capture.begin(), capture.end(), + capture_delay_buffer_.begin() + capture_delay_buffer_index_); + capture_delay_buffer_index_ = + (capture_delay_buffer_index_ + kBlockSize) % capture_delay_buffer_.size(); + + if (echo_path_delay_samples_shifted) { + blocks_since_last_delay_estimate_ = 0; + + // Correct for the capture signal delay. + const int echo_path_delay_samples_corrected = + static_cast<int>(*echo_path_delay_samples_shifted) - + static_cast<int>(capture_delay_buffer_.size()); + echo_path_delay_samples_ = std::max(0, echo_path_delay_samples_corrected); + + // Compute and set new render delay buffer delay. + const size_t new_delay = + ComputeNewBufferDelay(delay_, echo_path_delay_samples_); + if (align_call_counter_ > kNumBlocksPerSecond) { + delay_ = new_delay; + + // Update render delay buffer headroom. + if (echo_path_delay_samples_corrected >= 0) { + const int headroom = echo_path_delay_samples_ - delay_ * kBlockSize; + RTC_DCHECK_LE(0, headroom); + headroom_samples_ = headroom; + } else { + headroom_samples_ = rtc::nullopt; + } + } + + metrics_.Update(echo_path_delay_samples_, delay_); + } else { + metrics_.Update(rtc::nullopt, delay_); + } + + data_dumper_->DumpRaw("aec3_render_delay_controller_delay", 1, + &echo_path_delay_samples_); + data_dumper_->DumpRaw("aec3_render_delay_controller_buffer_delay", delay_); + + return delay_; +} + +} // namespace + +RenderDelayController* RenderDelayController::Create( + const EchoCanceller3Config& config, + int sample_rate_hz) { + return new RenderDelayControllerImpl(config, sample_rate_hz); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller.h new file mode 100644 index 0000000000..e971b5656a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ + +#include "api/array_view.h" +#include "api/optional.h" +#include "modules/audio_processing/aec3/downsampled_render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Class for aligning the render and capture signal using a RenderDelayBuffer. +class RenderDelayController { + public: + static RenderDelayController* Create(const EchoCanceller3Config& config, + int sample_rate_hz); + virtual ~RenderDelayController() = default; + + // Resets the delay controller. + virtual void Reset() = 0; + + // Receives the externally used delay. + virtual void SetDelay(size_t render_delay) = 0; + + // Aligns the render buffer content with the capture signal. + virtual size_t GetDelay(const DownsampledRenderBuffer& render_buffer, + rtc::ArrayView<const float> capture) = 0; + + // Returns an approximate value for the headroom in the buffer alignment. + virtual rtc::Optional<size_t> AlignmentHeadroomSamples() const = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc new file mode 100644 index 0000000000..696ac29a73 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" + +#include <algorithm> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +enum class DelayReliabilityCategory { + kNone, + kPoor, + kMedium, + kGood, + kExcellent, + kNumCategories +}; +enum class DelayChangesCategory { + kNone, + kFew, + kSeveral, + kMany, + kConstant, + kNumCategories +}; + +} // namespace + +void RenderDelayControllerMetrics::Update(rtc::Optional<size_t> delay_samples, + size_t buffer_delay_blocks) { + ++call_counter_; + + if (!initial_update) { + if (delay_samples) { + ++reliable_delay_estimate_counter_; + size_t delay_blocks = (*delay_samples) / kBlockSize; + + if (delay_blocks != delay_blocks_) { + ++delay_change_counter_; + delay_blocks_ = delay_blocks; + } + } + } else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) { + initial_update = false; + } + + if (call_counter_ == kMetricsReportingIntervalBlocks) { + int value_to_report = static_cast<int>(delay_blocks_); + value_to_report = std::min(124, value_to_report); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.EchoPathDelay", + value_to_report, 0, 124, 125); + + value_to_report = static_cast<int>(buffer_delay_blocks); + value_to_report = std::min(124, value_to_report); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.BufferDelay", + value_to_report, 0, 124, 125); + + DelayReliabilityCategory delay_reliability; + if (reliable_delay_estimate_counter_ == 0) { + delay_reliability = DelayReliabilityCategory::kNone; + } else if (reliable_delay_estimate_counter_ > (call_counter_ >> 1)) { + delay_reliability = DelayReliabilityCategory::kExcellent; + } else if (reliable_delay_estimate_counter_ > 100) { + delay_reliability = DelayReliabilityCategory::kGood; + } else if (reliable_delay_estimate_counter_ > 10) { + delay_reliability = DelayReliabilityCategory::kMedium; + } else { + delay_reliability = DelayReliabilityCategory::kPoor; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.ReliableDelayEstimates", + static_cast<int>(delay_reliability), + static_cast<int>(DelayReliabilityCategory::kNumCategories)); + + DelayChangesCategory delay_changes; + if (delay_change_counter_ == 0) { + delay_changes = DelayChangesCategory::kNone; + } else if (delay_change_counter_ > 10) { + delay_changes = DelayChangesCategory::kConstant; + } else if (delay_change_counter_ > 5) { + delay_changes = DelayChangesCategory::kMany; + } else if (delay_change_counter_ > 2) { + delay_changes = DelayChangesCategory::kSeveral; + } else { + delay_changes = DelayChangesCategory::kFew; + } + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.EchoCanceller.DelayChanges", + static_cast<int>(delay_changes), + static_cast<int>(DelayChangesCategory::kNumCategories)); + + metrics_reported_ = true; + call_counter_ = 0; + ResetMetrics(); + } else { + metrics_reported_ = false; + } +} + +void RenderDelayControllerMetrics::ResetMetrics() { + delay_change_counter_ = 0; + reliable_delay_estimate_counter_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h new file mode 100644 index 0000000000..a93f0764ed --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ + +#include "api/optional.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Handles the reporting of metrics for the render delay controller. +class RenderDelayControllerMetrics { + public: + RenderDelayControllerMetrics() = default; + + // Updates the metric with new data. + void Update(rtc::Optional<size_t> delay_samples, size_t buffer_delay_blocks); + + // Returns true if the metrics have just been reported, otherwise false. + bool MetricsReported() { return metrics_reported_; } + + private: + // Resets the metrics. + void ResetMetrics(); + + size_t delay_blocks_ = 0; + int reliable_delay_estimate_counter_ = 0; + int delay_change_counter_ = 0; + int call_counter_ = 0; + int initial_call_counter_ = 0; + bool metrics_reported_ = false; + bool initial_update = true; + + RTC_DISALLOW_COPY_AND_ASSIGN(RenderDelayControllerMetrics); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc new file mode 100644 index 0000000000..433c249554 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics_unittest.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller_metrics.h" +#include "api/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" + +#include "test/gtest.h" + +namespace webrtc { + +// Verify the general functionality of RenderDelayControllerMetrics. +TEST(RenderDelayControllerMetrics, NormalUsage) { + RenderDelayControllerMetrics metrics; + + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < kMetricsReportingIntervalBlocks - 1; ++k) { + metrics.Update(rtc::nullopt, 0); + EXPECT_FALSE(metrics.MetricsReported()); + } + metrics.Update(rtc::nullopt, 0); + EXPECT_TRUE(metrics.MetricsReported()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc new file mode 100644 index 0000000000..2e36d22484 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_unittest.cc @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_delay_controller.h" + +#include <algorithm> +#include <memory> +#include <sstream> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/block_processor.h" +#include "modules/audio_processing/aec3/decimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +std::string ProduceDebugText(int sample_rate_hz) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz; + return ss.str(); +} + +std::string ProduceDebugText(int sample_rate_hz, size_t delay) { + std::ostringstream ss; + ss << ProduceDebugText(sample_rate_hz) << ", Delay: " << delay; + return ss.str(); +} + +constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; + +} // namespace + +// Verifies the output of GetDelay when there are no AnalyzeRender calls. +TEST(RenderDelayController, NoRenderSignal) { + std::vector<float> block(kBlockSize, 0.f); + for (size_t num_matched_filters = 4; num_matched_filters == 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<RenderDelayBuffer> delay_buffer( + RenderDelayBuffer::Create( + NumBandsForRate(rate), down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, + num_matched_filters), + GetRenderDelayBufferSize(down_sampling_factor, + num_matched_filters))); + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate)); + for (size_t k = 0; k < 100; ++k) { + EXPECT_EQ(kMinEchoPathDelayBlocks, + delay_controller->GetDelay( + delay_buffer->GetDownsampledRenderBuffer(), block)); + } + } + } + } +} + +// Verifies the basic API call sequence. +TEST(RenderDelayController, BasicApiCalls) { + std::vector<float> capture_block(kBlockSize, 0.f); + size_t delay_blocks = 0; + for (size_t num_matched_filters = 4; num_matched_filters == 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + for (auto rate : {8000, 16000, 32000, 48000}) { + std::vector<std::vector<float>> render_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + NumBandsForRate(rate), down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, + num_matched_filters), + GetRenderDelayBufferSize(down_sampling_factor, + num_matched_filters))); + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate)); + for (size_t k = 0; k < 10; ++k) { + render_delay_buffer->Insert(render_block); + render_delay_buffer->UpdateBuffers(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), capture_block); + } + EXPECT_FALSE(delay_controller->AlignmentHeadroomSamples()); + EXPECT_EQ(kMinEchoPathDelayBlocks, delay_blocks); + } + } + } +} + +// Verifies that the RenderDelayController is able to align the signals for +// simple timeshifts between the signals. +TEST(RenderDelayController, Alignment) { + Random random_generator(42U); + std::vector<float> capture_block(kBlockSize, 0.f); + size_t delay_blocks = 0; + for (size_t num_matched_filters = 4; num_matched_filters == 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + for (auto rate : {8000, 16000, 32000, 48000}) { + std::vector<std::vector<float>> render_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + + for (size_t delay_samples : {15, 50, 150, 200, 800, 4000}) { + SCOPED_TRACE(ProduceDebugText(rate, delay_samples)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + NumBandsForRate(rate), down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, + num_matched_filters), + GetRenderDelayBufferSize(down_sampling_factor, + num_matched_filters))); + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate)); + DelayBuffer<float> signal_delay_buffer(delay_samples); + for (size_t k = 0; k < (400 + delay_samples / kBlockSize); ++k) { + RandomizeSampleVector(&random_generator, render_block[0]); + signal_delay_buffer.Delay(render_block[0], capture_block); + render_delay_buffer->Insert(render_block); + render_delay_buffer->UpdateBuffers(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + capture_block); + } + + constexpr int kDelayHeadroomBlocks = 1; + size_t expected_delay_blocks = + std::max(0, static_cast<int>(delay_samples / kBlockSize) - + kDelayHeadroomBlocks); + + EXPECT_EQ(expected_delay_blocks, delay_blocks); + + const rtc::Optional<size_t> headroom_samples = + delay_controller->AlignmentHeadroomSamples(); + ASSERT_TRUE(headroom_samples); + EXPECT_NEAR(delay_samples - delay_blocks * kBlockSize, + *headroom_samples, 4); + } + } + } + } +} + +// Verifies that the RenderDelayController is able to properly handle noncausal +// delays. +TEST(RenderDelayController, NonCausalAlignment) { + Random random_generator(42U); + size_t delay_blocks = 0; + for (size_t num_matched_filters = 4; num_matched_filters == 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + for (auto rate : {8000, 16000, 32000, 48000}) { + std::vector<std::vector<float>> render_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> capture_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + + for (int delay_samples : {-15, -50, -150, -200}) { + SCOPED_TRACE(ProduceDebugText(rate, -delay_samples)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + NumBandsForRate(rate), down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, + num_matched_filters), + GetRenderDelayBufferSize(down_sampling_factor, + num_matched_filters))); + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate)); + DelayBuffer<float> signal_delay_buffer(-delay_samples); + for (int k = 0; + k < (400 - delay_samples / static_cast<int>(kBlockSize)); ++k) { + RandomizeSampleVector(&random_generator, capture_block[0]); + signal_delay_buffer.Delay(capture_block[0], render_block[0]); + render_delay_buffer->Insert(render_block); + render_delay_buffer->UpdateBuffers(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + capture_block[0]); + } + + EXPECT_EQ(0u, delay_blocks); + + const rtc::Optional<size_t> headroom_samples = + delay_controller->AlignmentHeadroomSamples(); + ASSERT_FALSE(headroom_samples); + } + } + } + } +} + +// Verifies that the RenderDelayController is able to align the signals for +// simple timeshifts between the signals when there is jitter in the API calls. +TEST(RenderDelayController, AlignmentWithJitter) { + Random random_generator(42U); + std::vector<float> capture_block(kBlockSize, 0.f); + for (size_t num_matched_filters = 4; num_matched_filters == 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + for (auto rate : {8000, 16000, 32000, 48000}) { + std::vector<std::vector<float>> render_block( + NumBandsForRate(rate), std::vector<float>(kBlockSize, 0.f)); + for (size_t delay_samples : {15, 50, 300, 800}) { + size_t delay_blocks = 0; + SCOPED_TRACE(ProduceDebugText(rate, delay_samples)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + NumBandsForRate(rate), down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, + num_matched_filters), + GetRenderDelayBufferSize(down_sampling_factor, + num_matched_filters))); + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate)); + DelayBuffer<float> signal_delay_buffer(delay_samples); + for (size_t j = 0; j < (1000 + delay_samples / kBlockSize) / + kMaxApiCallsJitterBlocks + + 1; + ++j) { + std::vector<std::vector<float>> capture_block_buffer; + for (size_t k = 0; k < (kMaxApiCallsJitterBlocks - 1); ++k) { + RandomizeSampleVector(&random_generator, render_block[0]); + signal_delay_buffer.Delay(render_block[0], capture_block); + capture_block_buffer.push_back(capture_block); + render_delay_buffer->Insert(render_block); + } + for (size_t k = 0; k < (kMaxApiCallsJitterBlocks - 1); ++k) { + render_delay_buffer->UpdateBuffers(); + delay_blocks = delay_controller->GetDelay( + render_delay_buffer->GetDownsampledRenderBuffer(), + capture_block_buffer[k]); + } + } + + constexpr int kDelayHeadroomBlocks = 1; + size_t expected_delay_blocks = + std::max(0, static_cast<int>(delay_samples / kBlockSize) - + kDelayHeadroomBlocks); + if (expected_delay_blocks < 2) { + expected_delay_blocks = 0; + } + + EXPECT_EQ(expected_delay_blocks, delay_blocks); + + const rtc::Optional<size_t> headroom_samples = + delay_controller->AlignmentHeadroomSamples(); + ASSERT_TRUE(headroom_samples); + EXPECT_NEAR(delay_samples - delay_blocks * kBlockSize, + *headroom_samples, 4); + } + } + } + } +} + +// Verifies the initial value for the AlignmentHeadroomSamples. +TEST(RenderDelayController, InitialHeadroom) { + std::vector<float> render_block(kBlockSize, 0.f); + std::vector<float> capture_block(kBlockSize, 0.f); + for (size_t num_matched_filters = 4; num_matched_filters == 10; + num_matched_filters++) { + for (auto down_sampling_factor : kDownSamplingFactors) { + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create( + NumBandsForRate(rate), down_sampling_factor, + GetDownSampledBufferSize(down_sampling_factor, + num_matched_filters), + GetRenderDelayBufferSize(down_sampling_factor, + num_matched_filters))); + std::unique_ptr<RenderDelayController> delay_controller( + RenderDelayController::Create(EchoCanceller3Config(), rate)); + EXPECT_FALSE(delay_controller->AlignmentHeadroomSamples()); + } + } + } +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for the capture signal block size. +TEST(RenderDelayController, WrongCaptureSize) { + std::vector<float> block(kBlockSize - 1, 0.f); + for (auto rate : {8000, 16000, 32000, 48000}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create(NumBandsForRate(rate), 4, + GetDownSampledBufferSize(4, 4), + GetRenderDelayBufferSize(4, 4))); + EXPECT_DEATH( + std::unique_ptr<RenderDelayController>( + RenderDelayController::Create(EchoCanceller3Config(), rate)) + ->GetDelay(render_delay_buffer->GetDownsampledRenderBuffer(), + block), + ""); + } +} + +// Verifies the check for correct sample rate. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(RenderDelayController, DISABLED_WrongSampleRate) { + for (auto rate : {-1, 0, 8001, 16001}) { + SCOPED_TRACE(ProduceDebugText(rate)); + std::unique_ptr<RenderDelayBuffer> render_delay_buffer( + RenderDelayBuffer::Create(NumBandsForRate(rate), 4, + GetDownSampledBufferSize(4, 4), + GetRenderDelayBufferSize(4, 4))); + EXPECT_DEATH( + std::unique_ptr<RenderDelayController>( + RenderDelayController::Create(EchoCanceller3Config(), rate)), + ""); + } +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer.cc new file mode 100644 index 0000000000..22aa352320 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +#include <math.h> +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { + +namespace { +constexpr size_t kCounterThreshold = 5; + +// Identifies local bands with narrow characteristics. +void IdentifySmallNarrowBandRegions( + const RenderBuffer& render_buffer, + const rtc::Optional<size_t>& delay_partitions, + std::array<size_t, kFftLengthBy2 - 1>* narrow_band_counters) { + if (!delay_partitions) { + narrow_band_counters->fill(0); + return; + } + + const std::array<float, kFftLengthBy2Plus1>& X2 = + render_buffer.Spectrum(*delay_partitions); + + for (size_t k = 1; k < (X2.size() - 1); ++k) { + (*narrow_band_counters)[k - 1] = X2[k] > 3 * std::max(X2[k - 1], X2[k + 1]) + ? (*narrow_band_counters)[k - 1] + 1 + : 0; + } +} + +// Identifies whether the signal has a single strong narrow-band component. +void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer, + rtc::Optional<int>* narrow_peak_band, + size_t* narrow_peak_counter) { + const auto X2_latest = render_buffer.Spectrum(0); + + // Identify the spectral peak. + const int peak_bin = static_cast<int>( + std::max_element(X2_latest.begin(), X2_latest.end()) - X2_latest.begin()); + + // Compute the level around the peak. + float non_peak_power = 0.f; + for (int k = std::max(5, peak_bin - 14); k < peak_bin - 4; ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + for (int k = peak_bin + 5; + k < std::min(peak_bin + 15, static_cast<int>(kFftLengthBy2Plus1)); ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + + // Assess the render signal strength + const std::vector<std::vector<float>>& x_latest = + render_buffer.MostRecentBlock(); + auto result0 = std::minmax_element(x_latest[0].begin(), x_latest[0].end()); + float max_abs = std::max(fabs(*result0.first), fabs(*result0.second)); + + if (x_latest.size() > 1) { + const auto result1 = + std::minmax_element(x_latest[1].begin(), x_latest[1].end()); + max_abs = + std::max(max_abs, static_cast<float>(std::max(fabs(*result1.first), + fabs(*result1.second)))); + } + + // Detect whether the spectal peak has as strong narrowband nature. + if (peak_bin > 6 && max_abs > 100 && + X2_latest[peak_bin] > 100 * non_peak_power) { + *narrow_peak_band = peak_bin; + *narrow_peak_counter = 0; + } else { + if (*narrow_peak_band && ++(*narrow_peak_counter) > 7) { + *narrow_peak_band = rtc::nullopt; + } + } +} + +} // namespace + +RenderSignalAnalyzer::RenderSignalAnalyzer() { + narrow_band_counters_.fill(0); +} +RenderSignalAnalyzer::~RenderSignalAnalyzer() = default; + +void RenderSignalAnalyzer::Update( + const RenderBuffer& render_buffer, + const rtc::Optional<size_t>& delay_partitions) { + // Identify bands of narrow nature. + IdentifySmallNarrowBandRegions(render_buffer, delay_partitions, + &narrow_band_counters_); + + // Identify the presence of a strong narrow band. + IdentifyStrongNarrowBandComponent(render_buffer, &narrow_peak_band_, + &narrow_peak_counter_); +} + +void RenderSignalAnalyzer::MaskRegionsAroundNarrowBands( + std::array<float, kFftLengthBy2Plus1>* v) const { + RTC_DCHECK(v); + + // Set v to zero around narrow band signal regions. + if (narrow_band_counters_[0] > kCounterThreshold) { + (*v)[1] = (*v)[0] = 0.f; + } + for (size_t k = 2; k < kFftLengthBy2 - 1; ++k) { + if (narrow_band_counters_[k - 1] > kCounterThreshold) { + (*v)[k - 2] = (*v)[k - 1] = (*v)[k] = (*v)[k + 1] = (*v)[k + 2] = 0.f; + } + } + if (narrow_band_counters_[kFftLengthBy2 - 2] > kCounterThreshold) { + (*v)[kFftLengthBy2] = (*v)[kFftLengthBy2 - 1] = 0.f; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer.h new file mode 100644 index 0000000000..64d74f4e3f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ + +#include <array> +#include <memory> + +#include "api/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Provides functionality for analyzing the properties of the render signal. +class RenderSignalAnalyzer { + public: + RenderSignalAnalyzer(); + ~RenderSignalAnalyzer(); + + // Updates the render signal analysis with the most recent render signal. + void Update(const RenderBuffer& render_buffer, + const rtc::Optional<size_t>& delay_partitions); + + // Returns true if the render signal is poorly exciting. + bool PoorSignalExcitation() const { + RTC_DCHECK_LT(2, narrow_band_counters_.size()); + return std::any_of(narrow_band_counters_.begin(), + narrow_band_counters_.end(), + [](size_t a) { return a > 10; }); + } + + // Zeros the array around regions with narrow bands signal characteristics. + void MaskRegionsAroundNarrowBands( + std::array<float, kFftLengthBy2Plus1>* v) const; + + rtc::Optional<int> NarrowPeakBand() const { return narrow_peak_band_; } + + private: + std::array<size_t, kFftLengthBy2 - 1> narrow_band_counters_; + rtc::Optional<int> narrow_peak_band_; + size_t narrow_peak_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(RenderSignalAnalyzer); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc new file mode 100644 index 0000000000..7e01f3fc44 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/render_signal_analyzer.h" + +#include <math.h> +#include <array> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/fft_data.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kPi = 3.141592f; + +void ProduceSinusoid(int sample_rate_hz, + float sinusoidal_frequency_hz, + size_t* sample_counter, + rtc::ArrayView<float> x) { + // Produce a sinusoid of the specified frequency. + for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize); + ++k, ++j) { + x[j] = + 32767.f * sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); + } + *sample_counter = *sample_counter + kBlockSize; +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check for non-null output parameter works. +TEST(RenderSignalAnalyzer, NullMaskOutput) { + RenderSignalAnalyzer analyzer; + EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), ""); +} + +#endif + +// Verify that no narrow bands are detected in a Gaussian noise signal. +TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) { + RenderSignalAnalyzer analyzer; + Random random_generator(42U); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + std::array<float, kBlockSize> x_old; + FftData X; + Aec3Fft fft; + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1, + std::vector<size_t>(1, 1)); + std::array<float, kFftLengthBy2Plus1> mask; + x_old.fill(0.f); + + for (size_t k = 0; k < 100; ++k) { + RandomizeSampleVector(&random_generator, x[0]); + fft.PaddedFft(x[0], x_old, &X); + render_buffer.Insert(x); + analyzer.Update(render_buffer, 0); + } + + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + EXPECT_TRUE( + std::all_of(mask.begin(), mask.end(), [](float a) { return a == 1.f; })); + EXPECT_FALSE(analyzer.PoorSignalExcitation()); +} + +// Verify that a sinusiod signal is detected as narrow bands. +TEST(RenderSignalAnalyzer, NarrowBandDetection) { + RenderSignalAnalyzer analyzer; + Random random_generator(42U); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + std::array<float, kBlockSize> x_old; + Aec3Fft fft; + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1, + std::vector<size_t>(1, 1)); + std::array<float, kFftLengthBy2Plus1> mask; + x_old.fill(0.f); + constexpr int kSinusFrequencyBin = 32; + + auto generate_sinusoid_test = [&](bool known_delay) { + size_t sample_counter = 0; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoid(16000, 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2, + &sample_counter, x[0]); + render_buffer.Insert(x); + analyzer.Update(render_buffer, known_delay ? rtc::Optional<size_t>(0) + : rtc::nullopt); + } + }; + + generate_sinusoid_test(true); + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + for (int k = 0; k < static_cast<int>(mask.size()); ++k) { + EXPECT_EQ(abs(k - kSinusFrequencyBin) <= 2 ? 0.f : 1.f, mask[k]); + } + EXPECT_TRUE(analyzer.PoorSignalExcitation()); + + // Verify that no bands are detected as narrow when the delay is unknown. + generate_sinusoid_test(false); + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + std::for_each(mask.begin(), mask.end(), [](float a) { EXPECT_EQ(1.f, a); }); + EXPECT_FALSE(analyzer.PoorSignalExcitation()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc new file mode 100644 index 0000000000..ba65684fad --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/residual_echo_estimator.h" + +#include <numeric> +#include <vector> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Estimates the echo generating signal power as gated maximal power over a time +// window. +void EchoGeneratingPower(const RenderBuffer& render_buffer, + size_t min_delay, + size_t max_delay, + std::array<float, kFftLengthBy2Plus1>* X2) { + X2->fill(0.f); + for (size_t k = min_delay; k <= max_delay; ++k) { + std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(), + X2->begin(), + [](float a, float b) { return std::max(a, b); }); + } + + // Apply soft noise gate of -78 dBFS. + static constexpr float kNoiseGatePower = 27509.42f; + std::for_each(X2->begin(), X2->end(), [](float& a) { + if (kNoiseGatePower > a) { + a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a)); + } + }); +} + +constexpr int kNoiseFloorCounterMax = 50; +constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f; + +// Updates estimate for the power of the stationary noise component in the +// render signal. +void RenderNoisePower( + const RenderBuffer& render_buffer, + std::array<float, kFftLengthBy2Plus1>* X2_noise_floor, + std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) { + RTC_DCHECK(X2_noise_floor); + RTC_DCHECK(X2_noise_floor_counter); + + const auto render_power = render_buffer.Spectrum(0); + RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size()); + RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size()); + + // Estimate the stationary noise power in a minimum statistics manner. + for (size_t k = 0; k < render_power.size(); ++k) { + // Decrease rapidly. + if (render_power[k] < (*X2_noise_floor)[k]) { + (*X2_noise_floor)[k] = render_power[k]; + (*X2_noise_floor_counter)[k] = 0; + } else { + // Increase in a delayed, leaky manner. + if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) { + (*X2_noise_floor)[k] = + std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin); + } else { + ++(*X2_noise_floor_counter)[k]; + } + } + } +} + +} // namespace + +ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config) + : config_(config) { + Reset(); +} + +ResidualEchoEstimator::~ResidualEchoEstimator() = default; + +void ResidualEchoEstimator::Estimate( + const AecState& aec_state, + const RenderBuffer& render_buffer, + const std::array<float, kFftLengthBy2Plus1>& S2_linear, + const std::array<float, kFftLengthBy2Plus1>& Y2, + std::array<float, kFftLengthBy2Plus1>* R2) { + RTC_DCHECK(R2); + + // Estimate the power of the stationary noise in the render signal. + RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_); + + // Estimate the residual echo power. + if (aec_state.LinearEchoEstimate()) { + RTC_DCHECK(aec_state.FilterDelay()); + const int filter_delay = *aec_state.FilterDelay(); + LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2); + AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay, + aec_state.ReverbDecay(), R2); + + // If the echo is saturated, estimate the echo power as the maximum echo + // power with a leakage factor. + if (aec_state.SaturatedEcho()) { + R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); + } + } else { + const rtc::Optional<size_t> delay = + aec_state.ExternalDelay() + ? (aec_state.FilterDelay() ? aec_state.FilterDelay() + : aec_state.ExternalDelay()) + : rtc::Optional<size_t>(); + + // Estimate the echo generating signal power. + std::array<float, kFftLengthBy2Plus1> X2; + if (aec_state.ExternalDelay() && aec_state.FilterDelay()) { + RTC_DCHECK(delay); + const int delay_use = static_cast<int>(*delay); + + // Computes the spectral power over the blocks surrounding the delay. + constexpr int kKnownDelayRenderWindowSize = 5; + // TODO(peah): Add lookahead since that was what was there initially. + static_assert( + kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize, + "Requirement to ensure that the render buffer is overrun"); + EchoGeneratingPower( + render_buffer, std::max(0, delay_use - 1), + std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2); + } else { + // Computes the spectral power over the latest blocks. + // TODO(peah): Add lookahead since that was what was there initially. + EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1, + &X2); + } + + // Subtract the stationary noise power to avoid stationary noise causing + // excessive echo suppression. + std::transform( + X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), + [](float a, float b) { return std::max(0.f, a - 10.f * b); }); + + NonLinearEstimate( + aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(), + config_.ep_strength.bounded_erl, aec_state.TransparentMode(), + aec_state.InitialState(), X2, Y2, R2); + + if (aec_state.ExternalDelay() && aec_state.FilterDelay() && + aec_state.SaturatedEcho()) { + AddEchoReverb(*R2, aec_state.SaturatedEcho(), + std::min(static_cast<size_t>(kAdaptiveFilterLength), + delay.value_or(kAdaptiveFilterLength)), + aec_state.ReverbDecay(), R2); + } + } + + // If the echo is deemed inaudible, set the residual echo to zero. + if (aec_state.InaudibleEcho()) { + R2->fill(0.f); + R2_old_.fill(0.f); + R2_hold_counter_.fill(0.f); + } + + std::copy(R2->begin(), R2->end(), R2_old_.begin()); +} + +void ResidualEchoEstimator::Reset() { + X2_noise_floor_counter_.fill(kNoiseFloorCounterMax); + X2_noise_floor_.fill(kNoiseFloorMin); + R2_reverb_.fill(0.f); + R2_old_.fill(0.f); + R2_hold_counter_.fill(0.f); + for (auto& S2_k : S2_old_) { + S2_k.fill(0.f); + } +} + +void ResidualEchoEstimator::LinearEstimate( + const std::array<float, kFftLengthBy2Plus1>& S2_linear, + const std::array<float, kFftLengthBy2Plus1>& erle, + size_t delay, + std::array<float, kFftLengthBy2Plus1>* R2) { + std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f); + std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), + [](float a, float b) { + RTC_DCHECK_LT(0.f, a); + return b / a; + }); +} + +void ResidualEchoEstimator::NonLinearEstimate( + bool sufficient_filter_updates, + bool saturated_echo, + bool bounded_erl, + bool transparent_mode, + bool initial_state, + const std::array<float, kFftLengthBy2Plus1>& X2, + const std::array<float, kFftLengthBy2Plus1>& Y2, + std::array<float, kFftLengthBy2Plus1>* R2) { + float echo_path_gain_lf; + float echo_path_gain_mf; + float echo_path_gain_hf; + + // Set echo path gains. + if (saturated_echo) { + // If the echo could be saturated, use a very conservative gain. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f; + } else if (sufficient_filter_updates && !bounded_erl) { + // If the filter should have been able to converge, and no assumption is + // possible on the ERL, use a low gain. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f; + } else if ((sufficient_filter_updates && bounded_erl) || transparent_mode) { + // If the filter should have been able to converge, and and it is known that + // the ERL is bounded, use a very low gain. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.001f; + } else if (!initial_state) { + // If the AEC is no longer in an initial state, assume a weak echo path. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f; + } else { + // In the initial state, use conservative gains. + echo_path_gain_lf = config_.ep_strength.lf; + echo_path_gain_mf = config_.ep_strength.mf; + echo_path_gain_hf = config_.ep_strength.hf; + } + + // Compute preliminary residual echo. + std::transform( + X2.begin(), X2.begin() + 12, R2->begin(), + [echo_path_gain_lf](float a) { return a * echo_path_gain_lf; }); + std::transform( + X2.begin() + 12, X2.begin() + 25, R2->begin() + 12, + [echo_path_gain_mf](float a) { return a * echo_path_gain_mf; }); + std::transform( + X2.begin() + 25, X2.end(), R2->begin() + 25, + [echo_path_gain_hf](float a) { return a * echo_path_gain_hf; }); + + for (size_t k = 0; k < R2->size(); ++k) { + // Update hold counter. + R2_hold_counter_[k] = R2_old_[k] < (*R2)[k] ? 0 : R2_hold_counter_[k] + 1; + + // Compute the residual echo by holding a maximum echo powers and an echo + // fading corresponding to a room with an RT60 value of about 50 ms. + (*R2)[k] = R2_hold_counter_[k] < 2 + ? std::max((*R2)[k], R2_old_[k]) + : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]); + } +} + +void ResidualEchoEstimator::AddEchoReverb( + const std::array<float, kFftLengthBy2Plus1>& S2, + bool saturated_echo, + size_t delay, + float reverb_decay_factor, + std::array<float, kFftLengthBy2Plus1>* R2) { + // Compute the decay factor for how much the echo has decayed before leaving + // the region covered by the linear model. + auto integer_power = [](float base, int exp) { + float result = 1.f; + for (int k = 0; k < exp; ++k) { + result *= base; + } + return result; + }; + RTC_DCHECK_LE(delay, S2_old_.size()); + const float reverb_decay_for_delay = + integer_power(reverb_decay_factor, S2_old_.size() - delay); + + // Update the estimate of the reverberant residual echo power. + S2_old_index_ = S2_old_index_ > 0 ? S2_old_index_ - 1 : S2_old_.size() - 1; + const auto& S2_end = S2_old_[S2_old_index_]; + std::transform( + S2_end.begin(), S2_end.end(), R2_reverb_.begin(), R2_reverb_.begin(), + [reverb_decay_for_delay, reverb_decay_factor](float a, float b) { + return (b + a * reverb_decay_for_delay) * reverb_decay_factor; + }); + + // Update the buffer of old echo powers. + if (saturated_echo) { + S2_old_[S2_old_index_].fill((*std::max_element(S2.begin(), S2.end())) * + 100.f); + } else { + std::copy(S2.begin(), S2.end(), S2_old_[S2_old_index_].begin()); + } + + // Add the power of the echo reverb to the residual echo power. + std::transform(R2->begin(), R2->end(), R2_reverb_.begin(), R2->begin(), + std::plus<float>()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h new file mode 100644 index 0000000000..d4c4307eb5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ + +#include <algorithm> +#include <array> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ResidualEchoEstimator { + public: + explicit ResidualEchoEstimator(const EchoCanceller3Config& config); + ~ResidualEchoEstimator(); + + void Estimate(const AecState& aec_state, + const RenderBuffer& render_buffer, + const std::array<float, kFftLengthBy2Plus1>& S2_linear, + const std::array<float, kFftLengthBy2Plus1>& Y2, + std::array<float, kFftLengthBy2Plus1>* R2); + + private: + // Resets the state. + void Reset(); + + // Estimates the residual echo power based on the echo return loss enhancement + // (ERLE) and the linear power estimate. + void LinearEstimate(const std::array<float, kFftLengthBy2Plus1>& S2_linear, + const std::array<float, kFftLengthBy2Plus1>& erle, + size_t delay, + std::array<float, kFftLengthBy2Plus1>* R2); + + // Estimates the residual echo power based on the estimate of the echo path + // gain. + void NonLinearEstimate(bool sufficient_filter_updates, + bool saturated_echo, + bool bounded_erl, + bool transparent_mode, + bool initial_state, + const std::array<float, kFftLengthBy2Plus1>& X2, + const std::array<float, kFftLengthBy2Plus1>& Y2, + std::array<float, kFftLengthBy2Plus1>* R2); + + // Adds the estimated unmodelled echo power to the residual echo power + // estimate. + void AddEchoReverb(const std::array<float, kFftLengthBy2Plus1>& S2, + bool saturated_echo, + size_t delay, + float reverb_decay_factor, + std::array<float, kFftLengthBy2Plus1>* R2); + + std::array<float, kFftLengthBy2Plus1> R2_old_; + std::array<int, kFftLengthBy2Plus1> R2_hold_counter_; + std::array<float, kFftLengthBy2Plus1> R2_reverb_; + int S2_old_index_ = 0; + std::array<std::array<float, kFftLengthBy2Plus1>, kAdaptiveFilterLength> + S2_old_; + std::array<float, kFftLengthBy2Plus1> X2_noise_floor_; + std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_; + const EchoCanceller3Config config_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ResidualEchoEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc new file mode 100644 index 0000000000..b85bc1d936 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/residual_echo_estimator.h" + +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output residual echo power works. +TEST(ResidualEchoEstimator, NullResidualEchoPowerOutput) { + AecState aec_state(EchoCanceller3Config{}); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10, + std::vector<size_t>(1, 10)); + std::vector<std::array<float, kFftLengthBy2Plus1>> H2; + std::array<float, kFftLengthBy2Plus1> S2_linear; + std::array<float, kFftLengthBy2Plus1> Y2; + EXPECT_DEATH(ResidualEchoEstimator(EchoCanceller3Config{}) + .Estimate(aec_state, render_buffer, S2_linear, Y2, nullptr), + ""); +} + +#endif + +TEST(ResidualEchoEstimator, BasicTest) { + ResidualEchoEstimator estimator(EchoCanceller3Config{}); + EchoCanceller3Config config; + config.ep_strength.default_len = 0.f; + AecState aec_state(config); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10, + std::vector<size_t>(1, 10)); + std::array<float, kFftLengthBy2Plus1> E2_main; + std::array<float, kFftLengthBy2Plus1> E2_shadow; + std::array<float, kFftLengthBy2Plus1> S2_linear; + std::array<float, kFftLengthBy2Plus1> S2_fallback; + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kFftLengthBy2Plus1> R2; + EchoPathVariability echo_path_variability(false, false); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + std::vector<std::array<float, kFftLengthBy2Plus1>> H2(10); + Random random_generator(42U); + FftData X; + std::array<float, kBlockSize> x_old; + std::array<float, kBlockSize> s; + Aec3Fft fft; + + for (auto& H2_k : H2) { + H2_k.fill(0.01f); + } + H2[2].fill(10.f); + H2[2][0] = 0.1f; + + std::array<float, kAdaptiveFilterTimeDomainLength> h; + h.fill(0.f); + + s.fill(100.f); + + constexpr float kLevel = 10.f; + E2_shadow.fill(kLevel); + E2_main.fill(kLevel); + S2_linear.fill(kLevel); + S2_fallback.fill(kLevel); + Y2.fill(kLevel); + + for (int k = 0; k < 2000; ++k) { + RandomizeSampleVector(&random_generator, x[0]); + std::for_each(x[0].begin(), x[0].end(), [](float& a) { a /= 30.f; }); + fft.PaddedFft(x[0], x_old, &X); + render_buffer.Insert(x); + + aec_state.HandleEchoPathChange(echo_path_variability); + aec_state.Update(H2, h, true, 2, render_buffer, E2_main, Y2, x[0], s, + false); + + estimator.Estimate(aec_state, render_buffer, S2_linear, Y2, &R2); + } + std::for_each(R2.begin(), R2.end(), + [&](float a) { EXPECT_NEAR(kLevel, a, 0.1f); }); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc new file mode 100644 index 0000000000..db393a78b2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/shadow_filter_update_gain.h" + +#include <algorithm> +#include <functional> + +#include "rtc_base/checks.h" + +namespace webrtc { + +void ShadowFilterUpdateGain::HandleEchoPathChange() { + // TODO(peah): Check whether this counter should instead be initialized to a + // large value. + poor_signal_excitation_counter_ = 0; + call_counter_ = 0; +} + +void ShadowFilterUpdateGain::Compute( + const RenderBuffer& render_buffer, + const RenderSignalAnalyzer& render_signal_analyzer, + const FftData& E_shadow, + size_t size_partitions, + bool saturated_capture_signal, + FftData* G) { + RTC_DCHECK(G); + ++call_counter_; + + if (render_signal_analyzer.PoorSignalExcitation()) { + poor_signal_excitation_counter_ = 0; + } + + // Do not update the filter if the render is not sufficiently excited. + if (++poor_signal_excitation_counter_ < size_partitions || + saturated_capture_signal || call_counter_ <= size_partitions) { + G->re.fill(0.f); + G->im.fill(0.f); + return; + } + + // Compute mu. + // Corresponds to WGN of power -39 dBFS. + constexpr float kNoiseGatePower = 220075344.f; + constexpr float kMuFixed = .5f; + std::array<float, kFftLengthBy2Plus1> mu; + const auto& X2 = render_buffer.SpectralSum(size_partitions); + std::transform(X2.begin(), X2.end(), mu.begin(), [&](float a) { + return a > kNoiseGatePower ? kMuFixed / a : 0.f; + }); + + // Avoid updating the filter close to narrow bands in the render signals. + render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu); + + // G = mu * E * X2. + std::transform(mu.begin(), mu.end(), E_shadow.re.begin(), G->re.begin(), + std::multiplies<float>()); + std::transform(mu.begin(), mu.end(), E_shadow.im.begin(), G->im.begin(), + std::multiplies<float>()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h new file mode 100644 index 0000000000..8c72cf487b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SHADOW_FILTER_UPDATE_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SHADOW_FILTER_UPDATE_GAIN_H_ + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Provides functionality for computing the fixed gain for the shadow filter. +class ShadowFilterUpdateGain { + public: + // Takes action in the case of a known echo path change. + void HandleEchoPathChange(); + + // Computes the gain. + void Compute(const RenderBuffer& render_buffer, + const RenderSignalAnalyzer& render_signal_analyzer, + const FftData& E_shadow, + size_t size_partitions, + bool saturated_capture_signal, + FftData* G); + + private: + // TODO(peah): Check whether this counter should instead be initialized to a + // large value. + size_t poor_signal_excitation_counter_ = 0; + size_t call_counter_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SHADOW_FILTER_UPDATE_GAIN_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc new file mode 100644 index 0000000000..b89fc718ac --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/shadow_filter_update_gain.h" + +#include <algorithm> +#include <numeric> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Method for performing the simulations needed to test the main filter update +// gain functionality. +void RunFilterUpdateTest(int num_blocks_to_process, + size_t delay_samples, + const std::vector<int>& blocks_with_saturation, + std::array<float, kBlockSize>* e_last_block, + std::array<float, kBlockSize>* y_last_block, + FftData* G_last_block) { + ApmDataDumper data_dumper(42); + AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper); + AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper); + Aec3Fft fft; + RenderBuffer render_buffer( + Aec3Optimization::kNone, 3, main_filter.SizePartitions(), + std::vector<size_t>(1, main_filter.SizePartitions())); + std::array<float, kBlockSize> x_old; + x_old.fill(0.f); + ShadowFilterUpdateGain shadow_gain; + Random random_generator(42U); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + std::vector<float> y(kBlockSize, 0.f); + AecState aec_state(EchoCanceller3Config{}); + RenderSignalAnalyzer render_signal_analyzer; + std::array<float, kFftLength> s; + FftData S; + FftData G; + FftData E_shadow; + std::array<float, kBlockSize> e_shadow; + + constexpr float kScale = 1.0f / kFftLengthBy2; + + DelayBuffer<float> delay_buffer(delay_samples); + for (int k = 0; k < num_blocks_to_process; ++k) { + // Handle saturation. + bool saturation = + std::find(blocks_with_saturation.begin(), blocks_with_saturation.end(), + k) != blocks_with_saturation.end(); + + // Create the render signal. + RandomizeSampleVector(&random_generator, x[0]); + delay_buffer.Delay(x[0], y); + render_buffer.Insert(x); + render_signal_analyzer.Update(render_buffer, delay_samples / kBlockSize); + + shadow_filter.Filter(render_buffer, &S); + fft.Ifft(S, &s); + std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, + e_shadow.begin(), + [&](float a, float b) { return a - b * kScale; }); + std::for_each(e_shadow.begin(), e_shadow.end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(e_shadow, &E_shadow); + + shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow, + shadow_filter.SizePartitions(), saturation, &G); + shadow_filter.Adapt(render_buffer, G); + } + + std::copy(e_shadow.begin(), e_shadow.end(), e_last_block->begin()); + std::copy(y.begin(), y.end(), y_last_block->begin()); + std::copy(G.re.begin(), G.re.end(), G_last_block->re.begin()); + std::copy(G.im.begin(), G.im.end(), G_last_block->im.begin()); +} + +std::string ProduceDebugText(size_t delay) { + std::ostringstream ss; + ss << ", Delay: " << delay; + return ss.str(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gain parameter works. +TEST(ShadowFilterUpdateGain, NullDataOutputGain) { + ApmDataDumper data_dumper(42); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1, + std::vector<size_t>(1, 1)); + RenderSignalAnalyzer analyzer; + FftData E; + ShadowFilterUpdateGain gain; + EXPECT_DEATH(gain.Compute(render_buffer, analyzer, E, 1, false, nullptr), ""); +} + +#endif + +// Verifies that the gain formed causes the filter using it to converge. +TEST(ShadowFilterUpdateGain, GainCausesFilterToConverge) { + std::vector<int> blocks_with_echo_path_changes; + std::vector<int> blocks_with_saturation; + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples)); + + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> y; + FftData G; + + RunFilterUpdateTest(500, delay_samples, blocks_with_saturation, &e, &y, &G); + + // Verify that the main filter is able to perform well. + EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), + std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); + } +} + +// Verifies that the magnitude of the gain on average decreases for a +// persistently exciting signal. +TEST(ShadowFilterUpdateGain, DecreasingGain) { + std::vector<int> blocks_with_echo_path_changes; + std::vector<int> blocks_with_saturation; + + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> y; + FftData G_a; + FftData G_b; + FftData G_c; + std::array<float, kFftLengthBy2Plus1> G_a_power; + std::array<float, kFftLengthBy2Plus1> G_b_power; + std::array<float, kFftLengthBy2Plus1> G_c_power; + + RunFilterUpdateTest(100, 65, blocks_with_saturation, &e, &y, &G_a); + RunFilterUpdateTest(200, 65, blocks_with_saturation, &e, &y, &G_b); + RunFilterUpdateTest(300, 65, blocks_with_saturation, &e, &y, &G_c); + + G_a.Spectrum(Aec3Optimization::kNone, &G_a_power); + G_b.Spectrum(Aec3Optimization::kNone, &G_b_power); + G_c.Spectrum(Aec3Optimization::kNone, &G_c_power); + + EXPECT_GT(std::accumulate(G_a_power.begin(), G_a_power.end(), 0.), + std::accumulate(G_b_power.begin(), G_b_power.end(), 0.)); + + EXPECT_GT(std::accumulate(G_b_power.begin(), G_b_power.end(), 0.), + std::accumulate(G_c_power.begin(), G_c_power.end(), 0.)); +} + +// Verifies that the gain is zero when there is saturation. +TEST(ShadowFilterUpdateGain, SaturationBehavior) { + std::vector<int> blocks_with_echo_path_changes; + std::vector<int> blocks_with_saturation; + for (int k = 99; k < 200; ++k) { + blocks_with_saturation.push_back(k); + } + + std::array<float, kBlockSize> e; + std::array<float, kBlockSize> y; + FftData G_a; + FftData G_a_ref; + G_a_ref.re.fill(0.f); + G_a_ref.im.fill(0.f); + + RunFilterUpdateTest(100, 65, blocks_with_saturation, &e, &y, &G_a); + + EXPECT_EQ(G_a_ref.re, G_a.re); + EXPECT_EQ(G_a_ref.im, G_a.im); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor.cc new file mode 100644 index 0000000000..90e14f4f27 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor.h" + +#include <algorithm> +#include <numeric> + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +void PredictionError(const Aec3Fft& fft, + const FftData& S, + rtc::ArrayView<const float> y, + std::array<float, kBlockSize>* e, + FftData* E, + std::array<float, kBlockSize>* s) { + std::array<float, kFftLength> s_scratch; + fft.Ifft(S, &s_scratch); + constexpr float kScale = 1.0f / kFftLengthBy2; + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e->begin(), [&](float a, float b) { return a - b * kScale; }); + std::for_each(e->begin(), e->end(), + [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); + fft.ZeroPaddedFft(*e, E); + + if (s) { + for (size_t k = 0; k < s->size(); ++k) { + (*s)[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + } +} +} // namespace + +Subtractor::Subtractor(ApmDataDumper* data_dumper, + Aec3Optimization optimization) + : fft_(), + data_dumper_(data_dumper), + optimization_(optimization), + main_filter_(kAdaptiveFilterLength, optimization, data_dumper_), + shadow_filter_(kAdaptiveFilterLength, optimization, data_dumper_) { + RTC_DCHECK(data_dumper_); +} + +Subtractor::~Subtractor() = default; + +void Subtractor::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + if (echo_path_variability.delay_change) { + main_filter_.HandleEchoPathChange(); + shadow_filter_.HandleEchoPathChange(); + G_main_.HandleEchoPathChange(); + G_shadow_.HandleEchoPathChange(); + converged_filter_ = false; + } +} + +void Subtractor::Process(const RenderBuffer& render_buffer, + const rtc::ArrayView<const float> capture, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + SubtractorOutput* output) { + RTC_DCHECK_EQ(kBlockSize, capture.size()); + rtc::ArrayView<const float> y = capture; + FftData& E_main = output->E_main; + FftData E_shadow; + std::array<float, kBlockSize>& e_main = output->e_main; + std::array<float, kBlockSize>& e_shadow = output->e_shadow; + + FftData S; + FftData& G = S; + + // Form the output of the main filter. + main_filter_.Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main); + + // Form the output of the shadow filter. + shadow_filter_.Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr); + + + if (!converged_filter_) { + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + const float e2_main = + std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); + const float e2_shadow = + std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares); + const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); + + if (y2 > kBlockSize * 50.f * 50.f) { + converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2); + } + } + + // Compute spectra for future use. + E_main.Spectrum(optimization_, &output->E2_main); + E_shadow.Spectrum(optimization_, &output->E2_shadow); + + // Update the main filter. + G_main_.Compute(render_buffer, render_signal_analyzer, *output, main_filter_, + aec_state.SaturatedCapture(), &G); + main_filter_.Adapt(render_buffer, G); + data_dumper_->DumpRaw("aec3_subtractor_G_main", G.re); + data_dumper_->DumpRaw("aec3_subtractor_G_main", G.im); + + // Update the shadow filter. + G_shadow_.Compute(render_buffer, render_signal_analyzer, E_shadow, + shadow_filter_.SizePartitions(), + aec_state.SaturatedCapture(), &G); + shadow_filter_.Adapt(render_buffer, G); + + data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re); + data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.im); + + main_filter_.DumpFilter("aec3_subtractor_H_main"); + shadow_filter_.DumpFilter("aec3_subtractor_H_shadow"); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor.h new file mode 100644 index 0000000000..fe7928ea8a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ + +#include <array> +#include <algorithm> +#include <vector> + +#include "modules/audio_processing/aec3/adaptive_fir_filter.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/echo_path_variability.h" +#include "modules/audio_processing/aec3/main_filter_update_gain.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/shadow_filter_update_gain.h" +#include "modules/audio_processing/aec3/subtractor_output.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/utility/ooura_fft.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Proves linear echo cancellation functionality +class Subtractor { + public: + Subtractor(ApmDataDumper* data_dumper, Aec3Optimization optimization); + ~Subtractor(); + + // Performs the echo subtraction. + void Process(const RenderBuffer& render_buffer, + const rtc::ArrayView<const float> capture, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + SubtractorOutput* output); + + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + + // Returns the block-wise frequency response for the main adaptive filter. + const std::vector<std::array<float, kFftLengthBy2Plus1>>& + FilterFrequencyResponse() const { + return main_filter_.FilterFrequencyResponse(); + } + + // Returns the estimate of the impulse response for the main adaptive filter. + const std::array<float, kAdaptiveFilterTimeDomainLength>& + FilterImpulseResponse() const { + return main_filter_.FilterImpulseResponse(); + } + + bool ConvergedFilter() const { return converged_filter_; } + + private: + const Aec3Fft fft_; + ApmDataDumper* data_dumper_; + const Aec3Optimization optimization_; + AdaptiveFirFilter main_filter_; + AdaptiveFirFilter shadow_filter_; + MainFilterUpdateGain G_main_; + ShadowFilterUpdateGain G_shadow_; + bool converged_filter_ = false; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor_output.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor_output.h new file mode 100644 index 0000000000..8655665b35 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor_output.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ + +#include <array> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fft_data.h" + +namespace webrtc { + +// Stores the values being returned from the echo subtractor. +struct SubtractorOutput { + std::array<float, kBlockSize> s_main; + std::array<float, kBlockSize> e_main; + std::array<float, kBlockSize> e_shadow; + FftData E_main; + std::array<float, kFftLengthBy2Plus1> E2_main; + std::array<float, kFftLengthBy2Plus1> E2_shadow; + + void Reset() { + s_main.fill(0.f); + e_main.fill(0.f); + e_shadow.fill(0.f); + E_main.re.fill(0.f); + E_main.im.fill(0.f); + E2_main.fill(0.f); + E2_shadow.fill(0.f); + } +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBTRACTOR_OUTPUT_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc new file mode 100644 index 0000000000..b10421b428 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subtractor.h" + +#include <algorithm> +#include <numeric> +#include <string> + +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/test/echo_canceller_test_tools.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +float RunSubtractorTest(int num_blocks_to_process, + int delay_samples, + bool uncorrelated_inputs, + const std::vector<int>& blocks_with_echo_path_changes) { + ApmDataDumper data_dumper(42); + Subtractor subtractor(&data_dumper, DetectOptimization()); + std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f)); + std::vector<float> y(kBlockSize, 0.f); + std::array<float, kBlockSize> x_old; + SubtractorOutput output; + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength, + std::vector<size_t>(1, kAdaptiveFilterLength)); + RenderSignalAnalyzer render_signal_analyzer; + Random random_generator(42U); + Aec3Fft fft; + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kFftLengthBy2Plus1> E2_main; + std::array<float, kFftLengthBy2Plus1> E2_shadow; + AecState aec_state(EchoCanceller3Config{}); + x_old.fill(0.f); + Y2.fill(0.f); + E2_main.fill(0.f); + E2_shadow.fill(0.f); + + DelayBuffer<float> delay_buffer(delay_samples); + for (int k = 0; k < num_blocks_to_process; ++k) { + RandomizeSampleVector(&random_generator, x[0]); + if (uncorrelated_inputs) { + RandomizeSampleVector(&random_generator, y); + } else { + delay_buffer.Delay(x[0], y); + } + render_buffer.Insert(x); + render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay()); + + // Handle echo path changes. + if (std::find(blocks_with_echo_path_changes.begin(), + blocks_with_echo_path_changes.end(), + k) != blocks_with_echo_path_changes.end()) { + subtractor.HandleEchoPathChange(EchoPathVariability(true, true)); + } + subtractor.Process(render_buffer, y, render_signal_analyzer, aec_state, + &output); + + aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), delay_samples / kBlockSize, + render_buffer, E2_main, Y2, x[0], output.s_main, false); + } + + const float output_power = std::inner_product( + output.e_main.begin(), output.e_main.end(), output.e_main.begin(), 0.f); + const float y_power = std::inner_product(y.begin(), y.end(), y.begin(), 0.f); + if (y_power == 0.f) { + ADD_FAILURE(); + return -1.0; + } + return output_power / y_power; +} + +std::string ProduceDebugText(size_t delay) { + std::ostringstream ss; + ss << "Delay: " << delay; + return ss.str(); +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non data dumper works. +TEST(Subtractor, NullDataDumper) { + EXPECT_DEATH(Subtractor(nullptr, DetectOptimization()), ""); +} + +// Verifies the check for null subtractor output. +// TODO(peah): Re-enable the test once the issue with memory leaks during DEATH +// tests on test bots has been fixed. +TEST(Subtractor, DISABLED_NullOutput) { + ApmDataDumper data_dumper(42); + Subtractor subtractor(&data_dumper, DetectOptimization()); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength, + std::vector<size_t>(1, kAdaptiveFilterLength)); + RenderSignalAnalyzer render_signal_analyzer; + std::vector<float> y(kBlockSize, 0.f); + + EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer, + AecState(EchoCanceller3Config{}), nullptr), + ""); +} + +// Verifies the check for the capture signal size. +TEST(Subtractor, WrongCaptureSize) { + ApmDataDumper data_dumper(42); + Subtractor subtractor(&data_dumper, DetectOptimization()); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength, + std::vector<size_t>(1, kAdaptiveFilterLength)); + RenderSignalAnalyzer render_signal_analyzer; + std::vector<float> y(kBlockSize - 1, 0.f); + SubtractorOutput output; + + EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer, + AecState(EchoCanceller3Config{}), &output), + ""); +} + +#endif + +// Verifies that the subtractor is able to converge on correlated data. +TEST(Subtractor, Convergence) { + std::vector<int> blocks_with_echo_path_changes; + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples)); + + float echo_to_nearend_power = RunSubtractorTest( + 100, delay_samples, false, blocks_with_echo_path_changes); + EXPECT_GT(0.1f, echo_to_nearend_power); + } +} + +// Verifies that the subtractor does not converge on uncorrelated signals. +TEST(Subtractor, NonConvergenceOnUncorrelatedSignals) { + std::vector<int> blocks_with_echo_path_changes; + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples)); + + float echo_to_nearend_power = RunSubtractorTest( + 100, delay_samples, true, blocks_with_echo_path_changes); + EXPECT_NEAR(1.f, echo_to_nearend_power, 0.05); + } +} + +// Verifies that the subtractor is properly reset when there is an echo path +// change. +TEST(Subtractor, EchoPathChangeReset) { + std::vector<int> blocks_with_echo_path_changes; + blocks_with_echo_path_changes.push_back(99); + for (size_t delay_samples : {0, 64, 150, 200, 301}) { + SCOPED_TRACE(ProduceDebugText(delay_samples)); + + float echo_to_nearend_power = RunSubtractorTest( + 100, delay_samples, false, blocks_with_echo_path_changes); + EXPECT_NEAR(1.f, echo_to_nearend_power, 0.0000001f); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter.cc new file mode 100644 index 0000000000..8c92bf5762 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_filter.h" + +#include <math.h> +#include <algorithm> +#include <cstring> +#include <functional> +#include <numeric> + +#include "modules/audio_processing/utility/ooura_fft.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { +namespace { + +// Hanning window from Matlab command win = sqrt(hanning(128)). +const float kSqrtHanning[kFftLength] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f, + 0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f, + 0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f, + 0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f, + 0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f, + 0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f, + 0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f, + 0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f, + 0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f, + 0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f, + 0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f}; + +} // namespace + +SuppressionFilter::SuppressionFilter(int sample_rate_hz) + : sample_rate_hz_(sample_rate_hz), + fft_(), + e_output_old_(NumBandsForRate(sample_rate_hz_)) { + RTC_DCHECK(ValidFullBandRate(sample_rate_hz_)); + e_input_old_.fill(0.f); + std::for_each(e_output_old_.begin(), e_output_old_.end(), + [](std::array<float, kFftLengthBy2>& a) { a.fill(0.f); }); +} + +SuppressionFilter::~SuppressionFilter() = default; + +void SuppressionFilter::ApplyGain( + const FftData& comfort_noise, + const FftData& comfort_noise_high_band, + const std::array<float, kFftLengthBy2Plus1>& suppression_gain, + float high_bands_gain, + std::vector<std::vector<float>>* e) { + RTC_DCHECK(e); + RTC_DCHECK_EQ(e->size(), NumBandsForRate(sample_rate_hz_)); + FftData E; + std::array<float, kFftLength> e_extended; + constexpr float kIfftNormalization = 2.f / kFftLength; + + // Analysis filterbank. + std::transform(e_input_old_.begin(), e_input_old_.end(), + std::begin(kSqrtHanning), e_extended.begin(), + std::multiplies<float>()); + std::transform((*e)[0].begin(), (*e)[0].end(), + std::begin(kSqrtHanning) + kFftLengthBy2, + e_extended.begin() + kFftLengthBy2, std::multiplies<float>()); + std::copy((*e)[0].begin(), (*e)[0].end(), e_input_old_.begin()); + fft_.Fft(&e_extended, &E); + + // Apply gain. + std::transform(suppression_gain.begin(), suppression_gain.end(), E.re.begin(), + E.re.begin(), std::multiplies<float>()); + std::transform(suppression_gain.begin(), suppression_gain.end(), E.im.begin(), + E.im.begin(), std::multiplies<float>()); + + // Compute and add the comfort noise. + std::array<float, kFftLengthBy2Plus1> scaled_comfort_noise; + std::transform(suppression_gain.begin(), suppression_gain.end(), + comfort_noise.re.begin(), scaled_comfort_noise.begin(), + [](float a, float b) { return std::max(1.f - a, 0.f) * b; }); + std::transform(scaled_comfort_noise.begin(), scaled_comfort_noise.end(), + E.re.begin(), E.re.begin(), std::plus<float>()); + std::transform(suppression_gain.begin(), suppression_gain.end(), + comfort_noise.im.begin(), scaled_comfort_noise.begin(), + [](float a, float b) { return std::max(1.f - a, 0.f) * b; }); + std::transform(scaled_comfort_noise.begin(), scaled_comfort_noise.end(), + E.im.begin(), E.im.begin(), std::plus<float>()); + + // Synthesis filterbank. + fft_.Ifft(E, &e_extended); + std::transform(e_output_old_[0].begin(), e_output_old_[0].end(), + std::begin(kSqrtHanning) + kFftLengthBy2, (*e)[0].begin(), + [&](float a, float b) { return kIfftNormalization * a * b; }); + std::transform(e_extended.begin(), e_extended.begin() + kFftLengthBy2, + std::begin(kSqrtHanning), e_extended.begin(), + [&](float a, float b) { return kIfftNormalization * a * b; }); + std::transform((*e)[0].begin(), (*e)[0].end(), e_extended.begin(), + (*e)[0].begin(), std::plus<float>()); + std::for_each((*e)[0].begin(), (*e)[0].end(), [](float& x_k) { + x_k = rtc::SafeClamp(x_k, -32768.f, 32767.f); + }); + std::copy(e_extended.begin() + kFftLengthBy2, e_extended.begin() + kFftLength, + std::begin(e_output_old_[0])); + + if (e->size() > 1) { + // Form time-domain high-band noise. + std::array<float, kFftLength> time_domain_high_band_noise; + std::transform(comfort_noise_high_band.re.begin(), + comfort_noise_high_band.re.end(), E.re.begin(), + [&](float a) { return kIfftNormalization * a; }); + std::transform(comfort_noise_high_band.im.begin(), + comfort_noise_high_band.im.end(), E.im.begin(), + [&](float a) { return kIfftNormalization * a; }); + fft_.Ifft(E, &time_domain_high_band_noise); + + // Scale and apply the noise to the signals. + const float high_bands_noise_scaling = + 0.4f * std::max(1.f - high_bands_gain, 0.f); + + std::transform( + (*e)[1].begin(), (*e)[1].end(), time_domain_high_band_noise.begin(), + (*e)[1].begin(), [&](float a, float b) { + return std::max( + std::min(b * high_bands_noise_scaling + high_bands_gain * a, + 32767.0f), + -32768.0f); + }); + + if (e->size() > 2) { + RTC_DCHECK_EQ(3, e->size()); + std::for_each((*e)[2].begin(), (*e)[2].end(), [&](float& a) { + a = rtc::SafeClamp(a * high_bands_gain, -32768.f, 32767.f); + }); + } + + std::array<float, kFftLengthBy2> tmp; + for (size_t k = 1; k < e->size(); ++k) { + std::copy((*e)[k].begin(), (*e)[k].end(), tmp.begin()); + std::copy(e_output_old_[k].begin(), e_output_old_[k].end(), + (*e)[k].begin()); + std::copy(tmp.begin(), tmp.end(), e_output_old_[k].begin()); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter.h new file mode 100644 index 0000000000..5f91dea28f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ + +#include <array> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec3_fft.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class SuppressionFilter { + public: + explicit SuppressionFilter(int sample_rate_hz); + ~SuppressionFilter(); + void ApplyGain(const FftData& comfort_noise, + const FftData& comfort_noise_high_bands, + const std::array<float, kFftLengthBy2Plus1>& suppression_gain, + float high_bands_gain, + std::vector<std::vector<float>>* e); + + private: + const int sample_rate_hz_; + const OouraFft ooura_fft_; + const Aec3Fft fft_; + std::array<float, kFftLengthBy2> e_input_old_; + std::vector<std::array<float, kFftLengthBy2>> e_output_old_; + RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionFilter); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc new file mode 100644 index 0000000000..51b3f91f2a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_filter.h" + +#include <math.h> +#include <algorithm> +#include <numeric> + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr float kPi = 3.141592f; + +void ProduceSinusoid(int sample_rate_hz, + float sinusoidal_frequency_hz, + size_t* sample_counter, + rtc::ArrayView<float> x) { + // Produce a sinusoid of the specified frequency. + for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize); + ++k, ++j) { + x[j] = + 32767.f * sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); + } + *sample_counter = *sample_counter + kBlockSize; +} + +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies the check for null suppressor output. +TEST(SuppressionFilter, NullOutput) { + FftData cn; + FftData cn_high_bands; + std::array<float, kFftLengthBy2Plus1> gain; + + EXPECT_DEATH(SuppressionFilter(16000).ApplyGain(cn, cn_high_bands, gain, 1.0f, + nullptr), + ""); +} + +// Verifies the check for allowed sample rate. +TEST(SuppressionFilter, ProperSampleRate) { + EXPECT_DEATH(SuppressionFilter(16001), ""); +} + +#endif + +// Verifies that no comfort noise is added when the gain is 1. +TEST(SuppressionFilter, ComfortNoiseInUnityGain) { + SuppressionFilter filter(48000); + FftData cn; + FftData cn_high_bands; + std::array<float, kFftLengthBy2Plus1> gain; + + gain.fill(1.f); + cn.re.fill(1.f); + cn.im.fill(1.f); + cn_high_bands.re.fill(1.f); + cn_high_bands.im.fill(1.f); + + std::vector<std::vector<float>> e(3, std::vector<float>(kBlockSize, 0.f)); + std::vector<std::vector<float>> e_ref = e; + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); + + for (size_t k = 0; k < e.size(); ++k) { + EXPECT_EQ(e_ref[k], e[k]); + } +} + +// Verifies that the suppressor is able to suppress a signal. +TEST(SuppressionFilter, SignalSuppression) { + SuppressionFilter filter(48000); + FftData cn; + FftData cn_high_bands; + std::array<float, kFftLengthBy2Plus1> gain; + std::vector<std::vector<float>> e(3, std::vector<float>(kBlockSize, 0.f)); + + gain.fill(1.f); + std::for_each(gain.begin() + 10, gain.end(), [](float& a) { a = 0.f; }); + + cn.re.fill(0.f); + cn.im.fill(0.f); + cn_high_bands.re.fill(0.f); + cn_high_bands.im.fill(0.f); + + size_t sample_counter = 0; + + float e0_input = 0.f; + float e0_output = 0.f; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoid(16000, 16000 * 40 / kFftLengthBy2 / 2, &sample_counter, + e[0]); + e0_input = + std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input); + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); + e0_output = + std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output); + } + + EXPECT_LT(e0_output, e0_input / 1000.f); +} + +// Verifies that the suppressor is able to pass through a desired signal while +// applying suppressing for some frequencies. +TEST(SuppressionFilter, SignalTransparency) { + SuppressionFilter filter(48000); + FftData cn; + FftData cn_high_bands; + std::array<float, kFftLengthBy2Plus1> gain; + std::vector<std::vector<float>> e(3, std::vector<float>(kBlockSize, 0.f)); + + gain.fill(1.f); + std::for_each(gain.begin() + 30, gain.end(), [](float& a) { a = 0.f; }); + + cn.re.fill(0.f); + cn.im.fill(0.f); + cn_high_bands.re.fill(0.f); + cn_high_bands.im.fill(0.f); + + size_t sample_counter = 0; + + float e0_input = 0.f; + float e0_output = 0.f; + for (size_t k = 0; k < 100; ++k) { + ProduceSinusoid(16000, 16000 * 10 / kFftLengthBy2 / 2, &sample_counter, + e[0]); + e0_input = + std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input); + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); + e0_output = + std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output); + } + + EXPECT_LT(0.9f * e0_input, e0_output); +} + +// Verifies that the suppressor delay. +TEST(SuppressionFilter, Delay) { + SuppressionFilter filter(48000); + FftData cn; + FftData cn_high_bands; + std::array<float, kFftLengthBy2Plus1> gain; + std::vector<std::vector<float>> e(3, std::vector<float>(kBlockSize, 0.f)); + + gain.fill(1.f); + + cn.re.fill(0.f); + cn.im.fill(0.f); + cn_high_bands.re.fill(0.f); + cn_high_bands.im.fill(0.f); + + for (size_t k = 0; k < 100; ++k) { + for (size_t j = 0; j < 3; ++j) { + for (size_t i = 0; i < kBlockSize; ++i) { + e[j][i] = k * kBlockSize + i; + } + } + + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); + if (k > 2) { + for (size_t j = 0; j < 2; ++j) { + for (size_t i = 0; i < kBlockSize; ++i) { + EXPECT_NEAR(k * kBlockSize + i - kBlockSize, e[j][i], 0.01); + } + } + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain.cc new file mode 100644 index 0000000000..ae4d4ace23 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain.cc @@ -0,0 +1,426 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_gain.h" + +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include <math.h> +#include <algorithm> +#include <functional> +#include <numeric> + +#include "modules/audio_processing/aec3/vector_math.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Reduce gain to avoid narrow band echo leakage. +void NarrowBandAttenuation(int narrow_bin, + std::array<float, kFftLengthBy2Plus1>* gain) { + const int upper_bin = + std::min(narrow_bin + 6, static_cast<int>(kFftLengthBy2Plus1 - 1)); + for (int k = std::max(0, narrow_bin - 6); k <= upper_bin; ++k) { + (*gain)[k] = std::min((*gain)[k], 0.001f); + } +} + +// Adjust the gains according to the presence of known external filters. +void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) { + // Limit the low frequency gains to avoid the impact of the high-pass filter + // on the lower-frequency gain influencing the overall achieved gain. + (*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]); + + // Limit the high frequency gains to avoid the impact of the anti-aliasing + // filter on the upper-frequency gains influencing the overall achieved + // gain. TODO(peah): Update this when new anti-aliasing filters are + // implemented. + constexpr size_t kAntiAliasingImpactLimit = (64 * 2000) / 8000; + const float min_upper_gain = (*gain)[kAntiAliasingImpactLimit]; + std::for_each( + gain->begin() + kAntiAliasingImpactLimit, gain->end() - 1, + [min_upper_gain](float& a) { a = std::min(a, min_upper_gain); }); + (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1]; +} + +// Computes the gain to apply for the bands beyond the first band. +float UpperBandsGain( + const rtc::Optional<int>& narrow_peak_band, + bool saturated_echo, + const std::vector<std::vector<float>>& render, + const std::array<float, kFftLengthBy2Plus1>& low_band_gain) { + RTC_DCHECK_LT(0, render.size()); + if (render.size() == 1) { + return 1.f; + } + + if (narrow_peak_band && + (*narrow_peak_band > static_cast<int>(kFftLengthBy2Plus1 - 10))) { + return 0.001f; + } + + constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2; + const float gain_below_8_khz = *std::min_element( + low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end()); + + // Always attenuate the upper bands when there is saturated echo. + if (saturated_echo) { + return std::min(0.001f, gain_below_8_khz); + } + + // Compute the upper and lower band energies. + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + const float low_band_energy = + std::accumulate(render[0].begin(), render[0].end(), 0.f, sum_of_squares); + float high_band_energy = 0.f; + for (size_t k = 1; k < render.size(); ++k) { + const float energy = std::accumulate(render[k].begin(), render[k].end(), + 0.f, sum_of_squares); + high_band_energy = std::max(high_band_energy, energy); + } + + // If there is more power in the lower frequencies than the upper frequencies, + // or if the power in upper frequencies is low, do not bound the gain in the + // upper bands. + float anti_howling_gain; + constexpr float kThreshold = kBlockSize * 10.f * 10.f / 4.f; + if (high_band_energy < std::max(low_band_energy, kThreshold)) { + anti_howling_gain = 1.f; + } else { + // In all other cases, bound the gain for upper frequencies. + RTC_DCHECK_LE(low_band_energy, high_band_energy); + RTC_DCHECK_NE(0.f, high_band_energy); + anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy); + } + + // Choose the gain as the minimum of the lower and upper gains. + return std::min(gain_below_8_khz, anti_howling_gain); +} + +// Limits the gain increase. +void UpdateMaxGainIncrease( + const EchoCanceller3Config& config, + size_t no_saturation_counter, + bool low_noise_render, + bool linear_echo_estimate, + const std::array<float, kFftLengthBy2Plus1>& last_echo, + const std::array<float, kFftLengthBy2Plus1>& echo, + const std::array<float, kFftLengthBy2Plus1>& last_gain, + const std::array<float, kFftLengthBy2Plus1>& new_gain, + std::array<float, kFftLengthBy2Plus1>* gain_increase) { + float max_increasing; + float max_decreasing; + float rate_increasing; + float rate_decreasing; + float min_increasing; + float min_decreasing; + + auto& param = config.gain_updates; + if (linear_echo_estimate) { + max_increasing = param.nonlinear.max_inc; + max_decreasing = param.nonlinear.max_dec; + rate_increasing = param.nonlinear.rate_inc; + rate_decreasing = param.nonlinear.rate_dec; + min_increasing = param.nonlinear.min_inc; + min_decreasing = param.nonlinear.min_dec; + } else if (low_noise_render) { + max_increasing = param.low_noise.max_inc; + max_decreasing = param.low_noise.max_dec; + rate_increasing = param.low_noise.rate_inc; + rate_decreasing = param.low_noise.rate_dec; + min_increasing = param.low_noise.min_inc; + min_decreasing = param.low_noise.min_dec; + } else if (no_saturation_counter > 10) { + max_increasing = param.normal.max_inc; + max_decreasing = param.normal.max_dec; + rate_increasing = param.normal.rate_inc; + rate_decreasing = param.normal.rate_dec; + min_increasing = param.normal.min_inc; + min_decreasing = param.normal.min_dec; + } else { + max_increasing = param.saturation.max_inc; + max_decreasing = param.saturation.max_dec; + rate_increasing = param.saturation.rate_inc; + rate_decreasing = param.saturation.rate_dec; + min_increasing = param.saturation.min_inc; + min_decreasing = param.saturation.min_dec; + } + + for (size_t k = 0; k < new_gain.size(); ++k) { + if (echo[k] > last_echo[k]) { + (*gain_increase)[k] = + new_gain[k] > last_gain[k] + ? std::min(max_increasing, (*gain_increase)[k] * rate_increasing) + : min_increasing; + } else { + (*gain_increase)[k] = + new_gain[k] > last_gain[k] + ? std::min(max_decreasing, (*gain_increase)[k] * rate_decreasing) + : min_decreasing; + } + } +} + +// Computes the gain to reduce the echo to a non audible level. +void GainToNoAudibleEcho( + const EchoCanceller3Config& config, + bool low_noise_render, + bool saturated_echo, + bool saturating_echo_path, + bool linear_echo_estimate, + const std::array<float, kFftLengthBy2Plus1>& nearend, + const std::array<float, kFftLengthBy2Plus1>& echo, + const std::array<float, kFftLengthBy2Plus1>& masker, + const std::array<float, kFftLengthBy2Plus1>& min_gain, + const std::array<float, kFftLengthBy2Plus1>& max_gain, + const std::array<float, kFftLengthBy2Plus1>& one_by_echo, + std::array<float, kFftLengthBy2Plus1>* gain) { + float nearend_masking_margin = 0.f; + if (linear_echo_estimate) { + nearend_masking_margin = + low_noise_render + ? config.gain_mask.m9 + : (saturated_echo ? config.gain_mask.m2 : config.gain_mask.m3); + } else { + nearend_masking_margin = config.gain_mask.m7; + } + + RTC_DCHECK_LE(0.f, nearend_masking_margin); + RTC_DCHECK_GT(1.f, nearend_masking_margin); + const float one_by_one_minus_nearend_masking_margin = + 1.f / (1.0f - nearend_masking_margin); + + const float masker_margin = + linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8; + + for (size_t k = 0; k < gain->size(); ++k) { + const float unity_gain_masker = std::max(nearend[k], masker[k]); + RTC_DCHECK_LE(0.f, nearend_masking_margin * unity_gain_masker); + if (echo[k] <= nearend_masking_margin * unity_gain_masker || + unity_gain_masker <= 0.f) { + (*gain)[k] = 1.f; + } else { + RTC_DCHECK_LT(0.f, unity_gain_masker); + (*gain)[k] = std::max(0.f, (1.f - 5.f * echo[k] / unity_gain_masker) * + one_by_one_minus_nearend_masking_margin); + (*gain)[k] = + std::max(masker_margin * masker[k] * one_by_echo[k], (*gain)[k]); + } + + (*gain)[k] = std::min(std::max((*gain)[k], min_gain[k]), max_gain[k]); + } +} + +// TODO(peah): Make adaptive to take the actual filter error into account. +constexpr size_t kUpperAccurateBandPlus1 = 29; + +// Computes the signal output power that masks the echo signal. +void MaskingPower(const EchoCanceller3Config& config, + const std::array<float, kFftLengthBy2Plus1>& nearend, + const std::array<float, kFftLengthBy2Plus1>& comfort_noise, + const std::array<float, kFftLengthBy2Plus1>& last_masker, + const std::array<float, kFftLengthBy2Plus1>& gain, + std::array<float, kFftLengthBy2Plus1>* masker) { + std::array<float, kFftLengthBy2Plus1> side_band_masker; + float max_nearend_after_gain = 0.f; + for (size_t k = 0; k < gain.size(); ++k) { + const float nearend_after_gain = nearend[k] * gain[k]; + max_nearend_after_gain = + std::max(max_nearend_after_gain, nearend_after_gain); + side_band_masker[k] = nearend_after_gain + comfort_noise[k]; + (*masker)[k] = comfort_noise[k] + config.gain_mask.m4 * last_masker[k]; + } + + // Apply masking only between lower frequency bands. + RTC_DCHECK_LT(kUpperAccurateBandPlus1, gain.size()); + for (size_t k = 1; k < kUpperAccurateBandPlus1; ++k) { + (*masker)[k] += config.gain_mask.m5 * + (side_band_masker[k - 1] + side_band_masker[k + 1]); + } + + // Add full-band masking as a minimum value for the masker. + const float min_masker = max_nearend_after_gain * config.gain_mask.m6; + std::for_each(masker->begin(), masker->end(), + [min_masker](float& a) { a = std::max(a, min_masker); }); +} + +// Limits the gain in the frequencies for which the adaptive filter has not +// converged. Currently, these frequencies are not hardcoded to the frequencies +// which are typically not excited by speech. +// TODO(peah): Make adaptive to take the actual filter error into account. +void AdjustNonConvergedFrequencies( + std::array<float, kFftLengthBy2Plus1>* gain) { + constexpr float oneByBandsInSum = + 1 / static_cast<float>(kUpperAccurateBandPlus1 - 20); + const float hf_gain_bound = + std::accumulate(gain->begin() + 20, + gain->begin() + kUpperAccurateBandPlus1, 0.f) * + oneByBandsInSum; + + std::for_each(gain->begin() + kUpperAccurateBandPlus1, gain->end(), + [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); }); +} + +} // namespace + +// TODO(peah): Add further optimizations, in particular for the divisions. +void SuppressionGain::LowerBandGain( + bool low_noise_render, + const rtc::Optional<int>& narrow_peak_band, + bool saturated_echo, + bool saturating_echo_path, + bool linear_echo_estimate, + const std::array<float, kFftLengthBy2Plus1>& nearend, + const std::array<float, kFftLengthBy2Plus1>& echo, + const std::array<float, kFftLengthBy2Plus1>& comfort_noise, + std::array<float, kFftLengthBy2Plus1>* gain) { + // Count the number of blocks since saturation. + no_saturation_counter_ = saturated_echo ? 0 : no_saturation_counter_ + 1; + + // Precompute 1/echo (note that when the echo is zero, the precomputed value + // is never used). + std::array<float, kFftLengthBy2Plus1> one_by_echo; + std::transform(echo.begin(), echo.end(), one_by_echo.begin(), + [](float a) { return a > 0.f ? 1.f / a : 1.f; }); + + // Compute the minimum gain as the attenuating gain to put the signal just + // above the zero sample values. + std::array<float, kFftLengthBy2Plus1> min_gain; + const float min_echo_power = + low_noise_render ? config_.echo_audibility.low_render_limit + : config_.echo_audibility.normal_render_limit; + if (no_saturation_counter_ > 10) { + for (size_t k = 0; k < nearend.size(); ++k) { + const float denom = std::min(nearend[k], echo[k]); + min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f; + min_gain[k] = std::min(min_gain[k], 1.f); + } + } else { + min_gain.fill(0.f); + } + + // Compute the maximum gain by limiting the gain increase from the previous + // gain. + std::array<float, kFftLengthBy2Plus1> max_gain; + for (size_t k = 0; k < gain->size(); ++k) { + max_gain[k] = std::min(std::max(last_gain_[k] * gain_increase_[k], + config_.gain_updates.floor_first_increase), + 1.f); + } + + // Iteratively compute the gain required to attenuate the echo to a non + // noticeable level. + gain->fill(0.f); + for (int k = 0; k < 2; ++k) { + std::array<float, kFftLengthBy2Plus1> masker; + MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker); + GainToNoAudibleEcho(config_, low_noise_render, saturated_echo, + saturating_echo_path, linear_echo_estimate, nearend, + echo, masker, min_gain, max_gain, one_by_echo, gain); + AdjustForExternalFilters(gain); + if (narrow_peak_band) { + NarrowBandAttenuation(*narrow_peak_band, gain); + } + } + + // Adjust the gain for frequencies which have not yet converged. + AdjustNonConvergedFrequencies(gain); + + // Update the allowed maximum gain increase. + UpdateMaxGainIncrease(config_, no_saturation_counter_, low_noise_render, + linear_echo_estimate, last_echo_, echo, last_gain_, + *gain, &gain_increase_); + + // Adjust gain dynamics. + const float gain_bound = + std::max(0.001f, *std::min_element(gain->begin(), gain->end()) * 10000.f); + std::for_each(gain->begin(), gain->end(), + [gain_bound](float& a) { a = std::min(a, gain_bound); }); + + // Store data required for the gain computation of the next block. + std::copy(echo.begin(), echo.end(), last_echo_.begin()); + std::copy(gain->begin(), gain->end(), last_gain_.begin()); + MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, + &last_masker_); + aec3::VectorMath(optimization_).Sqrt(*gain); +} + +SuppressionGain::SuppressionGain(const EchoCanceller3Config& config, + Aec3Optimization optimization) + : optimization_(optimization), config_(config) { + last_gain_.fill(1.f); + last_masker_.fill(0.f); + gain_increase_.fill(1.f); + last_echo_.fill(0.f); +} + +void SuppressionGain::GetGain( + const std::array<float, kFftLengthBy2Plus1>& nearend, + const std::array<float, kFftLengthBy2Plus1>& echo, + const std::array<float, kFftLengthBy2Plus1>& comfort_noise, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + const std::vector<std::vector<float>>& render, + float* high_bands_gain, + std::array<float, kFftLengthBy2Plus1>* low_band_gain) { + RTC_DCHECK(high_bands_gain); + RTC_DCHECK(low_band_gain); + + const bool saturated_echo = aec_state.SaturatedEcho(); + const bool saturating_echo_path = aec_state.SaturatingEchoPath(); + const bool force_zero_gain = aec_state.ForcedZeroGain(); + const bool linear_echo_estimate = aec_state.LinearEchoEstimate(); + + if (force_zero_gain) { + last_gain_.fill(0.f); + std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin()); + low_band_gain->fill(0.f); + gain_increase_.fill(1.f); + *high_bands_gain = 0.f; + return; + } + + bool low_noise_render = low_render_detector_.Detect(render); + + // Compute gain for the lower band. + const rtc::Optional<int> narrow_peak_band = + render_signal_analyzer.NarrowPeakBand(); + LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo, + saturating_echo_path, linear_echo_estimate, nearend, echo, + comfort_noise, low_band_gain); + + // Compute the gain for the upper bands. + *high_bands_gain = + UpperBandsGain(narrow_peak_band, saturated_echo, render, *low_band_gain); +} + +// Detects when the render signal can be considered to have low power and +// consist of stationary noise. +bool SuppressionGain::LowNoiseRenderDetector::Detect( + const std::vector<std::vector<float>>& render) { + float x2_sum = 0.f; + float x2_max = 0.f; + for (auto x_k : render[0]) { + const float x2 = x_k * x_k; + x2_sum += x2; + x2_max = std::max(x2_max, x2); + } + + constexpr float kThreshold = 50.f * 50.f * 64.f; + const bool low_noise_render = + average_power_ < kThreshold && x2_max < 3 * average_power_; + average_power_ = average_power_ * 0.9f + x2_sum * 0.1f; + return low_noise_render; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain.h new file mode 100644 index 0000000000..6f21f71961 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ + +#include <array> +#include <vector> + +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_signal_analyzer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class SuppressionGain { + public: + SuppressionGain(const EchoCanceller3Config& config, + Aec3Optimization optimization); + void GetGain(const std::array<float, kFftLengthBy2Plus1>& nearend, + const std::array<float, kFftLengthBy2Plus1>& echo, + const std::array<float, kFftLengthBy2Plus1>& comfort_noise, + const RenderSignalAnalyzer& render_signal_analyzer, + const AecState& aec_state, + const std::vector<std::vector<float>>& render, + float* high_bands_gain, + std::array<float, kFftLengthBy2Plus1>* low_band_gain); + + private: + void LowerBandGain(bool stationary_with_low_power, + const rtc::Optional<int>& narrow_peak_band, + bool saturated_echo, + bool saturating_echo_path, + bool linear_echo_estimate, + const std::array<float, kFftLengthBy2Plus1>& nearend, + const std::array<float, kFftLengthBy2Plus1>& echo, + const std::array<float, kFftLengthBy2Plus1>& comfort_noise, + std::array<float, kFftLengthBy2Plus1>* gain); + + class LowNoiseRenderDetector { + public: + bool Detect(const std::vector<std::vector<float>>& render); + + private: + float average_power_ = 32768.f * 32768.f; + }; + + const Aec3Optimization optimization_; + std::array<float, kFftLengthBy2Plus1> last_gain_; + std::array<float, kFftLengthBy2Plus1> last_masker_; + std::array<float, kFftLengthBy2Plus1> gain_increase_; + std::array<float, kFftLengthBy2Plus1> last_echo_; + + LowNoiseRenderDetector low_render_detector_; + size_t no_saturation_counter_ = 0; + const EchoCanceller3Config config_; + RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionGain); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc new file mode 100644 index 0000000000..9fee6a24a9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/suppression_gain.h" + +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/subtractor.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { +namespace aec3 { + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// Verifies that the check for non-null output gains works. +TEST(SuppressionGain, NullOutputGains) { + std::array<float, kFftLengthBy2Plus1> E2; + std::array<float, kFftLengthBy2Plus1> R2; + std::array<float, kFftLengthBy2Plus1> N2; + E2.fill(0.f); + R2.fill(0.f); + N2.fill(0.f); + float high_bands_gain; + AecState aec_state(EchoCanceller3Config{}); + EXPECT_DEATH(SuppressionGain(EchoCanceller3Config{}, DetectOptimization()) + .GetGain(E2, R2, N2, RenderSignalAnalyzer(), aec_state, + std::vector<std::vector<float>>( + 3, std::vector<float>(kBlockSize, 0.f)), + &high_bands_gain, nullptr), + ""); +} + +#endif + +// Does a sanity check that the gains are correctly computed. +TEST(SuppressionGain, BasicGainComputation) { + SuppressionGain suppression_gain(EchoCanceller3Config(), + DetectOptimization()); + RenderSignalAnalyzer analyzer; + float high_bands_gain; + std::array<float, kFftLengthBy2Plus1> E2; + std::array<float, kFftLengthBy2Plus1> Y2; + std::array<float, kFftLengthBy2Plus1> R2; + std::array<float, kFftLengthBy2Plus1> N2; + std::array<float, kFftLengthBy2Plus1> g; + std::array<float, kBlockSize> s; + std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f)); + AecState aec_state(EchoCanceller3Config{}); + ApmDataDumper data_dumper(42); + Subtractor subtractor(&data_dumper, DetectOptimization()); + RenderBuffer render_buffer( + DetectOptimization(), 1, + std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength), + std::vector<size_t>(1, kAdaptiveFilterLength)); + + // Verify the functionality for forcing a zero gain. + E2.fill(1000000000.f); + R2.fill(10000000000000.f); + N2.fill(0.f); + s.fill(10.f); + aec_state.Update( + subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), 10, render_buffer, E2, Y2, x[0], s, false); + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, + &g); + std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); }); + EXPECT_FLOAT_EQ(0.f, high_bands_gain); + + // Ensure that a strong noise is detected to mask any echoes. + E2.fill(10.f); + Y2.fill(10.f); + R2.fill(0.1f); + N2.fill(100.f); + // Ensure that the gain is no longer forced to zero. + for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) { + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), 10, render_buffer, E2, Y2, + x[0], s, false); + } + + for (int k = 0; k < 100; ++k) { + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), 10, render_buffer, E2, Y2, + x[0], s, false); + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, + &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); + + // Ensure that a strong nearend is detected to mask any echoes. + E2.fill(100.f); + Y2.fill(100.f); + R2.fill(0.1f); + N2.fill(0.f); + for (int k = 0; k < 100; ++k) { + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), 10, render_buffer, E2, Y2, + x[0], s, false); + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, + &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); + + // Ensure that a strong echo is suppressed. + E2.fill(1000000000.f); + R2.fill(10000000000000.f); + N2.fill(0.f); + for (int k = 0; k < 10; ++k) { + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, + &high_bands_gain, &g); + } + std::for_each(g.begin(), g.end(), + [](float a) { EXPECT_NEAR(0.f, a, 0.001); }); + +} + +} // namespace aec3 +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/vector_math.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/vector_math.h new file mode 100644 index 0000000000..0672b513f3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/vector_math.h @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ + +#include "typedefs.h" // NOLINT(build/include) +#if defined(WEBRTC_HAS_NEON) +#include <arm_neon.h> +#endif +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include <emmintrin.h> +#endif +#include <math.h> +#include <algorithm> +#include <array> +#include <functional> + +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace aec3 { + +// Provides optimizations for mathematical operations based on vectors. +class VectorMath { + public: + explicit VectorMath(Aec3Optimization optimization) + : optimization_(optimization) {} + + // Elementwise square root. + void Sqrt(rtc::ArrayView<float> x) { + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + __m128 g = _mm_loadu_ps(&x[j]); + g = _mm_sqrt_ps(g); + _mm_storeu_ps(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } + } break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + float32x4_t g = vld1q_f32(&x[j]); +#if !defined(WEBRTC_ARCH_ARM64) + float32x4_t y = vrsqrteq_f32(g); + + // Code to handle sqrt(0). + // If the input to sqrtf() is zero, a zero will be returned. + // If the input to vrsqrteq_f32() is zero, positive infinity is + // returned. + const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000); + // check for divide by zero + const uint32x4_t div_by_zero = + vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(y)); + // zero out the positive infinity results + y = vreinterpretq_f32_u32( + vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(y))); + // from arm documentation + // The Newton-Raphson iteration: + // y[n+1] = y[n] * (3 - d * (y[n] * y[n])) / 2) + // converges to (1/√d) if y0 is the result of VRSQRTE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (int i = 0; i < 2; i++) { + y = vmulq_f32(vrsqrtsq_f32(vmulq_f32(y, y), g), y); + } + // sqrt(g) = g * 1/sqrt(g) + g = vmulq_f32(g, y); +#else + g = vsqrtq_f32(g); +#endif + vst1q_f32(&x[j], g); + } + + for (; j < x_size; ++j) { + x[j] = sqrtf(x[j]); + } + } +#endif + break; + default: + std::for_each(x.begin(), x.end(), [](float& a) { a = sqrtf(a); }); + } + } + + // Elementwise vector multiplication z = x * y. + void Multiply(rtc::ArrayView<const float> x, + rtc::ArrayView<const float> y, + rtc::ArrayView<float> z) { + RTC_DCHECK_EQ(z.size(), x.size()); + RTC_DCHECK_EQ(z.size(), y.size()); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const __m128 x_j = _mm_loadu_ps(&x[j]); + const __m128 y_j = _mm_loadu_ps(&y[j]); + const __m128 z_j = _mm_mul_ps(x_j, y_j); + _mm_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } + } break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const float32x4_t x_j = vld1q_f32(&x[j]); + const float32x4_t y_j = vld1q_f32(&y[j]); + const float32x4_t z_j = vmulq_f32(x_j, y_j); + vst1q_f32(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] = x[j] * y[j]; + } + } break; +#endif + default: + std::transform(x.begin(), x.end(), y.begin(), z.begin(), + std::multiplies<float>()); + } + } + + // Elementwise vector accumulation z += x. + void Accumulate(rtc::ArrayView<const float> x, rtc::ArrayView<float> z) { + RTC_DCHECK_EQ(z.size(), x.size()); + switch (optimization_) { +#if defined(WEBRTC_ARCH_X86_FAMILY) + case Aec3Optimization::kSse2: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const __m128 x_j = _mm_loadu_ps(&x[j]); + __m128 z_j = _mm_loadu_ps(&z[j]); + z_j = _mm_add_ps(x_j, z_j); + _mm_storeu_ps(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } + } break; +#endif +#if defined(WEBRTC_HAS_NEON) + case Aec3Optimization::kNeon: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; + + int j = 0; + for (; j < vector_limit * 4; j += 4) { + const float32x4_t x_j = vld1q_f32(&x[j]); + float32x4_t z_j = vld1q_f32(&z[j]); + z_j = vaddq_f32(z_j, x_j); + vst1q_f32(&z[j], z_j); + } + + for (; j < x_size; ++j) { + z[j] += x[j]; + } + } break; +#endif + default: + std::transform(x.begin(), x.end(), z.begin(), z.begin(), + std::plus<float>()); + } + } + + private: + Aec3Optimization optimization_; +}; + +} // namespace aec3 + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_VECTOR_MATH_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/vector_math_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/vector_math_unittest.cc new file mode 100644 index 0000000000..6bf60ecb64 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/vector_math_unittest.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/vector_math.h" + +#include <math.h> + +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "test/gtest.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +#if defined(WEBRTC_HAS_NEON) + +TEST(VectorMath, Sqrt) { + std::array<float, kFftLengthBy2Plus1> x; + std::array<float, kFftLengthBy2Plus1> z; + std::array<float, kFftLengthBy2Plus1> z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_neon.begin()); + aec3::VectorMath(Aec3Optimization::kNeon).Sqrt(z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_NEAR(z[k], z_neon[k], 0.0001f); + EXPECT_NEAR(sqrtf(x[k]), z_neon[k], 0.0001f); + } +} + +TEST(VectorMath, Multiply) { + std::array<float, kFftLengthBy2Plus1> x; + std::array<float, kFftLengthBy2Plus1> y; + std::array<float, kFftLengthBy2Plus1> z; + std::array<float, kFftLengthBy2Plus1> z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kNeon).Multiply(x, y, z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_neon[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_neon[k]); + } +} + +TEST(VectorMath, Accumulate) { + std::array<float, kFftLengthBy2Plus1> x; + std::array<float, kFftLengthBy2Plus1> z; + std::array<float, kFftLengthBy2Plus1> z_neon; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_neon[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kNeon).Accumulate(x, z_neon); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_neon[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_neon[k]); + } +} +#endif + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +TEST(VectorMath, Sqrt) { + if (WebRtc_GetCPUInfo(kSSE2) != 0) { + std::array<float, kFftLengthBy2Plus1> x; + std::array<float, kFftLengthBy2Plus1> z; + std::array<float, kFftLengthBy2Plus1> z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = (2.f / 3.f) * k; + } + + std::copy(x.begin(), x.end(), z.begin()); + aec3::VectorMath(Aec3Optimization::kNone).Sqrt(z); + std::copy(x.begin(), x.end(), z_sse2.begin()); + aec3::VectorMath(Aec3Optimization::kSse2).Sqrt(z_sse2); + EXPECT_EQ(z, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(sqrtf(x[k]), z_sse2[k]); + } + } +} + +TEST(VectorMath, Multiply) { + if (WebRtc_GetCPUInfo(kSSE2) != 0) { + std::array<float, kFftLengthBy2Plus1> x; + std::array<float, kFftLengthBy2Plus1> y; + std::array<float, kFftLengthBy2Plus1> z; + std::array<float, kFftLengthBy2Plus1> z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + y[k] = (2.f / 3.f) * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Multiply(x, y, z); + aec3::VectorMath(Aec3Optimization::kSse2).Multiply(x, y, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(x[k] * y[k], z_sse2[k]); + } + } +} + +TEST(VectorMath, Accumulate) { + if (WebRtc_GetCPUInfo(kSSE2) != 0) { + std::array<float, kFftLengthBy2Plus1> x; + std::array<float, kFftLengthBy2Plus1> z; + std::array<float, kFftLengthBy2Plus1> z_sse2; + + for (size_t k = 0; k < x.size(); ++k) { + x[k] = k; + z[k] = z_sse2[k] = 2.f * k; + } + + aec3::VectorMath(Aec3Optimization::kNone).Accumulate(x, z); + aec3::VectorMath(Aec3Optimization::kSse2).Accumulate(x, z_sse2); + for (size_t k = 0; k < z.size(); ++k) { + EXPECT_FLOAT_EQ(z[k], z_sse2[k]); + EXPECT_FLOAT_EQ(x[k] + 2.f * x[k], z_sse2[k]); + } + } +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/BUILD.gn b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/BUILD.gn new file mode 100644 index 0000000000..818a9bf27d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/BUILD.gn @@ -0,0 +1,114 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../webrtc.gni") # This contains def of 'rtc_enable_protobuf' + +rtc_source_set("aec_dump") { + sources = [ + "aec_dump_factory.h", + ] + + public_deps = [ + "..:aec_dump_interface", + ] + + deps = [ + "../../../rtc_base:rtc_base_approved", + ] +} + +rtc_source_set("mock_aec_dump") { + testonly = true + sources = [ + "mock_aec_dump.cc", + "mock_aec_dump.h", + ] + + deps = [ + "..:aec_dump_interface", + ] + public_deps = [ + "../..:module_api", + "../../../test:test_support", + "//testing/gmock", + ] +} + +rtc_source_set("mock_aec_dump_unittests") { + testonly = true + + sources = [ + "aec_dump_integration_test.cc", + ] + + deps = [ + ":mock_aec_dump", + "..:audio_processing", + "../../../rtc_base:rtc_base_approved", + "//testing/gtest", + ] +} + +if (rtc_enable_protobuf) { + rtc_source_set("aec_dump_impl") { + sources = [ + "aec_dump_impl.cc", + "aec_dump_impl.h", + "capture_stream_info.cc", + "capture_stream_info.h", + "write_to_file_task.cc", + "write_to_file_task.h", + ] + + public = [] + + public_deps = [ + ":aec_dump", + "..:aec_dump_interface", + ] + + deps = [ + "../../../modules:module_api", + "../../../rtc_base:protobuf_utils", + "../../../rtc_base:rtc_base_approved", + "../../../rtc_base:rtc_task_queue", + "../../../system_wrappers", + ] + + deps += [ "../:audioproc_debug_proto" ] + } + + rtc_source_set("aec_dump_unittests") { + testonly = true + defines = [] + deps = [ + ":aec_dump_impl", + "..:aec_dump_interface", + "..:audioproc_debug_proto", + "../../../modules:module_api", + "../../../rtc_base:rtc_task_queue", + "../../../test:test_support", + "//testing/gtest", + ] + sources = [ + "aec_dump_unittest.cc", + ] + } +} + +rtc_source_set("null_aec_dump_factory") { + assert_no_deps = [ ":aec_dump_impl" ] + sources = [ + "null_aec_dump_factory.cc", + ] + + public_deps = [ + ":aec_dump", + "..:aec_dump_interface", + ] +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_factory.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_factory.h new file mode 100644 index 0000000000..e3f00f67a2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_factory.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ + +#include <memory> +#include <string> + +#include "modules/audio_processing/include/aec_dump.h" +#include "rtc_base/platform_file.h" + +namespace rtc { +class TaskQueue; +} // namespace rtc + +namespace webrtc { + +class AecDumpFactory { + public: + // The |worker_queue| may not be null and must outlive the created + // AecDump instance. |max_log_size_bytes == -1| means the log size + // will be unlimited. |handle| may not be null. The AecDump takes + // responsibility for |handle| and closes it in the destructor. A + // non-null return value indicates that the file has been + // sucessfully opened. + static std::unique_ptr<AecDump> Create(rtc::PlatformFile file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + static std::unique_ptr<AecDump> Create(std::string file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + static std::unique_ptr<AecDump> Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build new file mode 100644 index 0000000000..a708b3b78f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_gn/moz.build @@ -0,0 +1,179 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0920" + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + +Library("aec_dump_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc new file mode 100644 index 0000000000..594bf85cd9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <utility> + +#include "modules/audio_processing/aec_dump/aec_dump_impl.h" + +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "rtc_base/checks.h" +#include "rtc_base/event.h" +#include "rtc_base/ptr_util.h" + +namespace webrtc { + +namespace { +void CopyFromConfigToEvent(const webrtc::InternalAPMConfig& config, + webrtc::audioproc::Config* pb_cfg) { + pb_cfg->set_aec_enabled(config.aec_enabled); + pb_cfg->set_aec_delay_agnostic_enabled(config.aec_delay_agnostic_enabled); + pb_cfg->set_aec_drift_compensation_enabled( + config.aec_drift_compensation_enabled); + pb_cfg->set_aec_extended_filter_enabled(config.aec_extended_filter_enabled); + pb_cfg->set_aec_suppression_level(config.aec_suppression_level); + + pb_cfg->set_aecm_enabled(config.aecm_enabled); + pb_cfg->set_aecm_comfort_noise_enabled(config.aecm_comfort_noise_enabled); + pb_cfg->set_aecm_routing_mode(config.aecm_routing_mode); + + pb_cfg->set_agc_enabled(config.agc_enabled); + pb_cfg->set_agc_mode(config.agc_mode); + pb_cfg->set_agc_limiter_enabled(config.agc_limiter_enabled); + pb_cfg->set_noise_robust_agc_enabled(config.noise_robust_agc_enabled); + + pb_cfg->set_hpf_enabled(config.hpf_enabled); + + pb_cfg->set_ns_enabled(config.ns_enabled); + pb_cfg->set_ns_level(config.ns_level); + + pb_cfg->set_transient_suppression_enabled( + config.transient_suppression_enabled); + pb_cfg->set_intelligibility_enhancer_enabled( + config.intelligibility_enhancer_enabled); + + pb_cfg->set_experiments_description(config.experiments_description); +} + +} // namespace + +AecDumpImpl::AecDumpImpl(std::unique_ptr<FileWrapper> debug_file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) + : debug_file_(std::move(debug_file)), + num_bytes_left_for_log_(max_log_size_bytes), + worker_queue_(worker_queue), + capture_stream_info_(CreateWriteToFileTask()) {} + +AecDumpImpl::~AecDumpImpl() { + // Block until all tasks have finished running. + rtc::Event thread_sync_event(false /* manual_reset */, false); + worker_queue_->PostTask([&thread_sync_event] { thread_sync_event.Set(); }); + // Wait until the event has been signaled with .Set(). By then all + // pending tasks will have finished. + thread_sync_event.Wait(rtc::Event::kForever); +} + +void AecDumpImpl::WriteInitMessage( + const InternalAPMStreamsConfig& streams_config) { + auto task = CreateWriteToFileTask(); + auto* event = task->GetEvent(); + event->set_type(audioproc::Event::INIT); + audioproc::Init* msg = event->mutable_init(); + + msg->set_sample_rate(streams_config.input_sample_rate); + msg->set_output_sample_rate(streams_config.output_sample_rate); + msg->set_reverse_sample_rate(streams_config.render_input_sample_rate); + msg->set_reverse_output_sample_rate(streams_config.render_output_sample_rate); + + msg->set_num_input_channels( + static_cast<int32_t>(streams_config.input_num_channels)); + msg->set_num_output_channels( + static_cast<int32_t>(streams_config.output_num_channels)); + msg->set_num_reverse_channels( + static_cast<int32_t>(streams_config.render_input_num_channels)); + msg->set_num_reverse_output_channels( + streams_config.render_output_num_channels); + + worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task))); +} + +void AecDumpImpl::AddCaptureStreamInput(const FloatAudioFrame& src) { + capture_stream_info_.AddInput(src); +} + +void AecDumpImpl::AddCaptureStreamOutput(const FloatAudioFrame& src) { + capture_stream_info_.AddOutput(src); +} + +void AecDumpImpl::AddCaptureStreamInput(const AudioFrame& frame) { + capture_stream_info_.AddInput(frame); +} + +void AecDumpImpl::AddCaptureStreamOutput(const AudioFrame& frame) { + capture_stream_info_.AddOutput(frame); +} + +void AecDumpImpl::AddAudioProcessingState(const AudioProcessingState& state) { + capture_stream_info_.AddAudioProcessingState(state); +} + +void AecDumpImpl::WriteCaptureStreamMessage() { + auto task = capture_stream_info_.GetTask(); + RTC_DCHECK(task); + worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task))); + capture_stream_info_.SetTask(CreateWriteToFileTask()); +} + +void AecDumpImpl::WriteRenderStreamMessage(const AudioFrame& frame) { + auto task = CreateWriteToFileTask(); + auto* event = task->GetEvent(); + + event->set_type(audioproc::Event::REVERSE_STREAM); + audioproc::ReverseStream* msg = event->mutable_reverse_stream(); + const size_t data_size = + sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_; + msg->set_data(frame.data(), data_size); + + worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task))); +} + +void AecDumpImpl::WriteRenderStreamMessage(const FloatAudioFrame& src) { + auto task = CreateWriteToFileTask(); + auto* event = task->GetEvent(); + + event->set_type(audioproc::Event::REVERSE_STREAM); + + audioproc::ReverseStream* msg = event->mutable_reverse_stream(); + + for (size_t i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + msg->add_channel(channel_view.begin(), sizeof(float) * channel_view.size()); + } + + worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task))); +} + +void AecDumpImpl::WriteConfig(const InternalAPMConfig& config) { + RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + auto task = CreateWriteToFileTask(); + auto* event = task->GetEvent(); + event->set_type(audioproc::Event::CONFIG); + CopyFromConfigToEvent(config, event->mutable_config()); + worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task))); +} + +std::unique_ptr<WriteToFileTask> AecDumpImpl::CreateWriteToFileTask() { + return rtc::MakeUnique<WriteToFileTask>(debug_file_.get(), + &num_bytes_left_for_log_); +} + +std::unique_ptr<AecDump> AecDumpFactory::Create(rtc::PlatformFile file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + RTC_DCHECK(worker_queue); + std::unique_ptr<FileWrapper> debug_file(FileWrapper::Create()); + FILE* handle = rtc::FdopenPlatformFileForWriting(file); + if (!handle) { + return nullptr; + } + if (!debug_file->OpenFromFileHandle(handle)) { + return nullptr; + } + return rtc::MakeUnique<AecDumpImpl>(std::move(debug_file), max_log_size_bytes, + worker_queue); +} + +std::unique_ptr<AecDump> AecDumpFactory::Create(std::string file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + RTC_DCHECK(worker_queue); + std::unique_ptr<FileWrapper> debug_file(FileWrapper::Create()); + if (!debug_file->OpenFile(file_name.c_str(), false)) { + return nullptr; + } + return rtc::MakeUnique<AecDumpImpl>(std::move(debug_file), max_log_size_bytes, + worker_queue); +} + +std::unique_ptr<AecDump> AecDumpFactory::Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + RTC_DCHECK(worker_queue); + RTC_DCHECK(handle); + std::unique_ptr<FileWrapper> debug_file(FileWrapper::Create()); + if (!debug_file->OpenFromFileHandle(handle)) { + return nullptr; + } + return rtc::MakeUnique<AecDumpImpl>(std::move(debug_file), max_log_size_bytes, + worker_queue); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.h new file mode 100644 index 0000000000..5be876b6b7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "modules/audio_processing/aec_dump/capture_stream_info.h" +#include "modules/audio_processing/aec_dump/write_to_file_task.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/platform_file.h" +#include "rtc_base/race_checker.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/thread_annotations.h" +#include "system_wrappers/include/file_wrapper.h" + +// Files generated at build-time by the protobuf compiler. +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace rtc { +class TaskQueue; +} // namespace rtc + +namespace webrtc { + +// Task-queue based implementation of AecDump. It is thread safe by +// relying on locks in TaskQueue. +class AecDumpImpl : public AecDump { + public: + // Does member variables initialization shared across all c-tors. + AecDumpImpl(std::unique_ptr<FileWrapper> debug_file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue); + + ~AecDumpImpl() override; + + void WriteInitMessage(const InternalAPMStreamsConfig& api_format) override; + + void AddCaptureStreamInput(const FloatAudioFrame& src) override; + void AddCaptureStreamOutput(const FloatAudioFrame& src) override; + void AddCaptureStreamInput(const AudioFrame& frame) override; + void AddCaptureStreamOutput(const AudioFrame& frame) override; + void AddAudioProcessingState(const AudioProcessingState& state) override; + void WriteCaptureStreamMessage() override; + + void WriteRenderStreamMessage(const AudioFrame& frame) override; + void WriteRenderStreamMessage(const FloatAudioFrame& src) override; + + void WriteConfig(const InternalAPMConfig& config) override; + + private: + std::unique_ptr<WriteToFileTask> CreateWriteToFileTask(); + + std::unique_ptr<FileWrapper> debug_file_; + int64_t num_bytes_left_for_log_ = 0; + rtc::RaceChecker race_checker_; + rtc::TaskQueue* worker_queue_; + CaptureStreamInfo capture_stream_info_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc new file mode 100644 index 0000000000..720e4f5c34 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_integration_test.cc @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <utility> + +#include "modules/audio_processing/aec_dump/mock_aec_dump.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/ptr_util.h" + +using testing::_; +using testing::AtLeast; +using testing::Exactly; +using testing::Matcher; +using testing::StrictMock; + +namespace { +std::unique_ptr<webrtc::AudioProcessing> CreateAudioProcessing() { + webrtc::Config config; + std::unique_ptr<webrtc::AudioProcessing> apm( + webrtc::AudioProcessing::Create(config)); + RTC_DCHECK(apm); + return apm; +} + +std::unique_ptr<webrtc::test::MockAecDump> CreateMockAecDump() { + auto mock_aec_dump = + rtc::MakeUnique<testing::StrictMock<webrtc::test::MockAecDump>>(); + EXPECT_CALL(*mock_aec_dump.get(), WriteConfig(_)).Times(AtLeast(1)); + EXPECT_CALL(*mock_aec_dump.get(), WriteInitMessage(_)).Times(AtLeast(1)); + return std::unique_ptr<webrtc::test::MockAecDump>(std::move(mock_aec_dump)); +} + +std::unique_ptr<webrtc::AudioFrame> CreateFakeFrame() { + auto fake_frame = rtc::MakeUnique<webrtc::AudioFrame>(); + fake_frame->num_channels_ = 1; + fake_frame->sample_rate_hz_ = 48000; + fake_frame->samples_per_channel_ = 480; + return fake_frame; +} + +} // namespace + +TEST(AecDumpIntegration, ConfigurationAndInitShouldBeLogged) { + auto apm = CreateAudioProcessing(); + + apm->AttachAecDump(CreateMockAecDump()); +} + +TEST(AecDumpIntegration, + RenderStreamShouldBeLoggedOnceEveryProcessReverseStream) { + auto apm = CreateAudioProcessing(); + auto mock_aec_dump = CreateMockAecDump(); + auto fake_frame = CreateFakeFrame(); + + EXPECT_CALL(*mock_aec_dump.get(), + WriteRenderStreamMessage(Matcher<const webrtc::AudioFrame&>(_))) + .Times(Exactly(1)); + + apm->AttachAecDump(std::move(mock_aec_dump)); + apm->ProcessReverseStream(fake_frame.get()); +} + +TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) { + auto apm = CreateAudioProcessing(); + auto mock_aec_dump = CreateMockAecDump(); + auto fake_frame = CreateFakeFrame(); + + EXPECT_CALL(*mock_aec_dump.get(), + AddCaptureStreamInput(Matcher<const webrtc::AudioFrame&>(_))) + .Times(AtLeast(1)); + + EXPECT_CALL(*mock_aec_dump.get(), + AddCaptureStreamOutput(Matcher<const webrtc::AudioFrame&>(_))) + .Times(Exactly(1)); + + EXPECT_CALL(*mock_aec_dump.get(), AddAudioProcessingState(_)) + .Times(Exactly(1)); + + EXPECT_CALL(*mock_aec_dump.get(), WriteCaptureStreamMessage()) + .Times(Exactly(1)); + + apm->AttachAecDump(std::move(mock_aec_dump)); + apm->ProcessStream(fake_frame.get()); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc new file mode 100644 index 0000000000..965ac03bd7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/aec_dump_unittest.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <utility> + +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" + +#include "modules/include/module_common_types.h" +#include "rtc_base/task_queue.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +TEST(AecDumper, APICallsDoNotCrash) { + // Note order of initialization: Task queue has to be initialized + // before AecDump. + rtc::TaskQueue file_writer_queue("file_writer_queue"); + + const std::string filename = + webrtc::test::TempFilename(webrtc::test::OutputPath(), "aec_dump"); + + { + std::unique_ptr<webrtc::AecDump> aec_dump = + webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue); + + const webrtc::AudioFrame frame; + aec_dump->WriteRenderStreamMessage(frame); + + aec_dump->AddCaptureStreamInput(frame); + aec_dump->AddCaptureStreamOutput(frame); + + aec_dump->WriteCaptureStreamMessage(); + + webrtc::InternalAPMConfig apm_config; + aec_dump->WriteConfig(apm_config); + + webrtc::InternalAPMStreamsConfig streams_config; + aec_dump->WriteInitMessage(streams_config); + } + // Remove file after the AecDump d-tor has finished. + ASSERT_EQ(0, remove(filename.c_str())); +} + +TEST(AecDumper, WriteToFile) { + rtc::TaskQueue file_writer_queue("file_writer_queue"); + + const std::string filename = + webrtc::test::TempFilename(webrtc::test::OutputPath(), "aec_dump"); + + { + std::unique_ptr<webrtc::AecDump> aec_dump = + webrtc::AecDumpFactory::Create(filename, -1, &file_writer_queue); + const webrtc::AudioFrame frame; + aec_dump->WriteRenderStreamMessage(frame); + } + + // Verify the file has been written after the AecDump d-tor has + // finished. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc new file mode 100644 index 0000000000..e3284d8822 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec_dump/capture_stream_info.h" + +namespace webrtc { +CaptureStreamInfo::CaptureStreamInfo(std::unique_ptr<WriteToFileTask> task) + : task_(std::move(task)) { + RTC_DCHECK(task_); + task_->GetEvent()->set_type(audioproc::Event::STREAM); +} + +CaptureStreamInfo::~CaptureStreamInfo() = default; + +void CaptureStreamInfo::AddInput(const FloatAudioFrame& src) { + RTC_DCHECK(task_); + auto* stream = task_->GetEvent()->mutable_stream(); + + for (size_t i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + stream->add_input_channel(channel_view.begin(), + sizeof(float) * channel_view.size()); + } +} + +void CaptureStreamInfo::AddOutput(const FloatAudioFrame& src) { + RTC_DCHECK(task_); + auto* stream = task_->GetEvent()->mutable_stream(); + + for (size_t i = 0; i < src.num_channels(); ++i) { + const auto& channel_view = src.channel(i); + stream->add_output_channel(channel_view.begin(), + sizeof(float) * channel_view.size()); + } +} + +void CaptureStreamInfo::AddInput(const AudioFrame& frame) { + RTC_DCHECK(task_); + auto* stream = task_->GetEvent()->mutable_stream(); + const size_t data_size = + sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_; + stream->set_input_data(frame.data(), data_size); +} + +void CaptureStreamInfo::AddOutput(const AudioFrame& frame) { + RTC_DCHECK(task_); + auto* stream = task_->GetEvent()->mutable_stream(); + const size_t data_size = + sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_; + stream->set_output_data(frame.data(), data_size); +} + +void CaptureStreamInfo::AddAudioProcessingState( + const AecDump::AudioProcessingState& state) { + RTC_DCHECK(task_); + auto* stream = task_->GetEvent()->mutable_stream(); + stream->set_delay(state.delay); + stream->set_drift(state.drift); + stream->set_level(state.level); + stream->set_keypress(state.keypress); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/capture_stream_info.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/capture_stream_info.h new file mode 100644 index 0000000000..9999c3fbd0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/capture_stream_info.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ + +#include <memory> +#include <utility> +#include <vector> + +#include "modules/audio_processing/aec_dump/write_to_file_task.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/logging.h" + +// Files generated at build-time by the protobuf compiler. +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +class CaptureStreamInfo { + public: + explicit CaptureStreamInfo(std::unique_ptr<WriteToFileTask> task); + ~CaptureStreamInfo(); + void AddInput(const FloatAudioFrame& src); + void AddOutput(const FloatAudioFrame& src); + + void AddInput(const AudioFrame& frame); + void AddOutput(const AudioFrame& frame); + + void AddAudioProcessingState(const AecDump::AudioProcessingState& state); + + std::unique_ptr<WriteToFileTask> GetTask() { + RTC_DCHECK(task_); + return std::move(task_); + } + + void SetTask(std::unique_ptr<WriteToFileTask> task) { + RTC_DCHECK(!task_); + RTC_DCHECK(task); + task_ = std::move(task); + task_->GetEvent()->set_type(audioproc::Event::STREAM); + } + + private: + std::unique_ptr<WriteToFileTask> task_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_CAPTURE_STREAM_INFO_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc new file mode 100644 index 0000000000..aa89e45fe8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/mock_aec_dump.cc @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/aec_dump/mock_aec_dump.h" + +namespace webrtc { + +namespace test { + +MockAecDump::MockAecDump() = default; +MockAecDump::~MockAecDump() = default; +} +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/mock_aec_dump.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/mock_aec_dump.h new file mode 100644 index 0000000000..6df6f2849c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/mock_aec_dump.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ + +#include <memory> + +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/include/module_common_types.h" +#include "test/gmock.h" + +namespace webrtc { + +namespace test { + +class MockAecDump : public AecDump { + public: + MockAecDump(); + virtual ~MockAecDump(); + + MOCK_METHOD1(WriteInitMessage, + void(const InternalAPMStreamsConfig& streams_config)); + + MOCK_METHOD1(AddCaptureStreamInput, void(const FloatAudioFrame& src)); + MOCK_METHOD1(AddCaptureStreamOutput, void(const FloatAudioFrame& src)); + MOCK_METHOD1(AddCaptureStreamInput, void(const AudioFrame& frame)); + MOCK_METHOD1(AddCaptureStreamOutput, void(const AudioFrame& frame)); + MOCK_METHOD1(AddAudioProcessingState, + void(const AudioProcessingState& state)); + MOCK_METHOD0(WriteCaptureStreamMessage, void()); + + MOCK_METHOD1(WriteRenderStreamMessage, void(const AudioFrame& frame)); + MOCK_METHOD1(WriteRenderStreamMessage, void(const FloatAudioFrame& src)); + + MOCK_METHOD1(WriteConfig, void(const InternalAPMConfig& config)); +}; + +} // namespace test + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_MOCK_AEC_DUMP_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc new file mode 100644 index 0000000000..5623e24aa5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/include/aec_dump.h" + +namespace webrtc { + +std::unique_ptr<AecDump> AecDumpFactory::Create(rtc::PlatformFile file, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} + +std::unique_ptr<AecDump> AecDumpFactory::Create(std::string file_name, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} + +std::unique_ptr<AecDump> AecDumpFactory::Create(FILE* handle, + int64_t max_log_size_bytes, + rtc::TaskQueue* worker_queue) { + return nullptr; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build new file mode 100644 index 0000000000..610edcdef5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/null_aec_dump_factory_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/null_aec_dump_factory.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0920" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "FreeBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "NetBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "OpenBSD": + + CXXFLAGS += [ + "-msse2" + ] + +Library("null_aec_dump_factory_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/write_to_file_task.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/write_to_file_task.cc new file mode 100644 index 0000000000..5fdfd4ec82 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/write_to_file_task.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec_dump/write_to_file_task.h" + +#include "rtc_base/protobuf_utils.h" + +namespace webrtc { + +WriteToFileTask::WriteToFileTask(webrtc::FileWrapper* debug_file, + int64_t* num_bytes_left_for_log) + : debug_file_(debug_file), + num_bytes_left_for_log_(num_bytes_left_for_log) {} + +WriteToFileTask::~WriteToFileTask() = default; + +audioproc::Event* WriteToFileTask::GetEvent() { + return &event_; +} + +bool WriteToFileTask::IsRoomForNextEvent(size_t event_byte_size) const { + int64_t next_message_size = event_byte_size + sizeof(int32_t); + return (*num_bytes_left_for_log_ < 0) || + (*num_bytes_left_for_log_ >= next_message_size); +} + +void WriteToFileTask::UpdateBytesLeft(size_t event_byte_size) { + RTC_DCHECK(IsRoomForNextEvent(event_byte_size)); + if (*num_bytes_left_for_log_ >= 0) { + *num_bytes_left_for_log_ -= (sizeof(int32_t) + event_byte_size); + } +} + +bool WriteToFileTask::Run() { + if (!debug_file_->is_open()) { + return true; + } + + ProtoString event_string; + event_.SerializeToString(&event_string); + + const size_t event_byte_size = event_.ByteSize(); + + if (!IsRoomForNextEvent(event_byte_size)) { + debug_file_->CloseFile(); + return true; + } + + UpdateBytesLeft(event_byte_size); + + // Write message preceded by its size. + if (!debug_file_->Write(&event_byte_size, sizeof(int32_t))) { + RTC_NOTREACHED(); + } + if (!debug_file_->Write(event_string.data(), event_string.length())) { + RTC_NOTREACHED(); + } + return true; // Delete task from queue at once. +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/write_to_file_task.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/write_to_file_task.h new file mode 100644 index 0000000000..7301473247 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump/write_to_file_task.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC_DUMP_WRITE_TO_FILE_TASK_H_ +#define MODULES_AUDIO_PROCESSING_AEC_DUMP_WRITE_TO_FILE_TASK_H_ + +#include <memory> +#include <string> +#include <utility> + +#include "rtc_base/checks.h" +#include "rtc_base/event.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/platform_file.h" +#include "rtc_base/task_queue.h" +#include "system_wrappers/include/file_wrapper.h" + +// Files generated at build-time by the protobuf compiler. +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +class WriteToFileTask : public rtc::QueuedTask { + public: + WriteToFileTask(webrtc::FileWrapper* debug_file, + int64_t* num_bytes_left_for_log); + ~WriteToFileTask() override; + + audioproc::Event* GetEvent(); + + private: + bool IsRoomForNextEvent(size_t event_byte_size) const; + + void UpdateBytesLeft(size_t event_byte_size); + + bool Run() override; + + webrtc::FileWrapper* debug_file_; + audioproc::Event event_; + int64_t* num_bytes_left_for_log_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC_DUMP_WRITE_TO_FILE_TASK_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump_interface_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump_interface_gn/moz.build new file mode 100644 index 0000000000..0f2de68370 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aec_dump_interface_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/include/aec_dump.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0920" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "FreeBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "NetBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "OpenBSD": + + CXXFLAGS += [ + "-msse2" + ] + +Library("aec_dump_interface_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core.cc new file mode 100644 index 0000000000..d69dc1ce76 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core.cc @@ -0,0 +1,1232 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" + +#include <stddef.h> +#include <stdlib.h> + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/real_fft.h" +} +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +extern "C" { +#include "system_wrappers/include/cpu_features_wrapper.h" +} + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "typedefs.h" // NOLINT(build/include) + +#ifdef AEC_DEBUG +FILE *dfile; +FILE *testfile; +#endif + +const int16_t WebRtcAecm_kCosTable[] = { + 8192, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, + 8091, 8067, 8041, 8012, 7982, 7948, 7912, 7874, 7834, + 7791, 7745, 7697, 7647, 7595, 7540, 7483, 7424, 7362, + 7299, 7233, 7164, 7094, 7021, 6947, 6870, 6791, 6710, + 6627, 6542, 6455, 6366, 6275, 6182, 6087, 5991, 5892, + 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, + 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845, + 3719, 3591, 3462, 3331, 3200, 3068, 2935, 2801, 2667, + 2531, 2395, 2258, 2120, 1981, 1842, 1703, 1563, 1422, + 1281, 1140, 998, 856, 713, 571, 428, 285, 142, + 0, -142, -285, -428, -571, -713, -856, -998, -1140, + -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395, + -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591, + -3719, -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698, + -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, -5690, + -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, -6542, + -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233, + -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745, + -7791, -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067, + -8091, -8112, -8130, -8147, -8160, -8172, -8180, -8187, -8190, + -8191, -8190, -8187, -8180, -8172, -8160, -8147, -8130, -8112, + -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362, + -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710, + -6627, -6542, -6455, -6366, -6275, -6182, -6087, -5991, -5892, + -5792, -5690, -5586, -5481, -5374, -5265, -5155, -5043, -4930, + -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, -3845, + -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667, + -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422, + -1281, -1140, -998, -856, -713, -571, -428, -285, -142, + 0, 142, 285, 428, 571, 713, 856, 998, 1140, + 1281, 1422, 1563, 1703, 1842, 1981, 2120, 2258, 2395, + 2531, 2667, 2801, 2935, 3068, 3200, 3331, 3462, 3591, + 3719, 3845, 3971, 4095, 4219, 4341, 4461, 4580, 4698, + 4815, 4930, 5043, 5155, 5265, 5374, 5481, 5586, 5690, + 5792, 5892, 5991, 6087, 6182, 6275, 6366, 6455, 6542, + 6627, 6710, 6791, 6870, 6947, 7021, 7094, 7164, 7233, + 7299, 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745, + 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, 8067, + 8091, 8112, 8130, 8147, 8160, 8172, 8180, 8187, 8190 +}; + +const int16_t WebRtcAecm_kSinTable[] = { + 0, 142, 285, 428, 571, 713, 856, 998, + 1140, 1281, 1422, 1563, 1703, 1842, 1981, 2120, + 2258, 2395, 2531, 2667, 2801, 2935, 3068, 3200, + 3331, 3462, 3591, 3719, 3845, 3971, 4095, 4219, + 4341, 4461, 4580, 4698, 4815, 4930, 5043, 5155, + 5265, 5374, 5481, 5586, 5690, 5792, 5892, 5991, + 6087, 6182, 6275, 6366, 6455, 6542, 6627, 6710, + 6791, 6870, 6947, 7021, 7094, 7164, 7233, 7299, + 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745, + 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, + 8067, 8091, 8112, 8130, 8147, 8160, 8172, 8180, + 8187, 8190, 8191, 8190, 8187, 8180, 8172, 8160, + 8147, 8130, 8112, 8091, 8067, 8041, 8012, 7982, + 7948, 7912, 7874, 7834, 7791, 7745, 7697, 7647, + 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164, + 7094, 7021, 6947, 6870, 6791, 6710, 6627, 6542, + 6455, 6366, 6275, 6182, 6087, 5991, 5892, 5792, + 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, + 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, + 3845, 3719, 3591, 3462, 3331, 3200, 3068, 2935, + 2801, 2667, 2531, 2395, 2258, 2120, 1981, 1842, + 1703, 1563, 1422, 1281, 1140, 998, 856, 713, + 571, 428, 285, 142, 0, -142, -285, -428, + -571, -713, -856, -998, -1140, -1281, -1422, -1563, + -1703, -1842, -1981, -2120, -2258, -2395, -2531, -2667, + -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719, + -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698, + -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, + -5690, -5792, -5892, -5991, -6087, -6182, -6275, -6366, + -6455, -6542, -6627, -6710, -6791, -6870, -6947, -7021, + -7094, -7164, -7233, -7299, -7362, -7424, -7483, -7540, + -7595, -7647, -7697, -7745, -7791, -7834, -7874, -7912, + -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130, + -8147, -8160, -8172, -8180, -8187, -8190, -8191, -8190, + -8187, -8180, -8172, -8160, -8147, -8130, -8112, -8091, + -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, + -7362, -7299, -7233, -7164, -7094, -7021, -6947, -6870, + -6791, -6710, -6627, -6542, -6455, -6366, -6275, -6182, + -6087, -5991, -5892, -5792, -5690, -5586, -5481, -5374, + -5265, -5155, -5043, -4930, -4815, -4698, -4580, -4461, + -4341, -4219, -4096, -3971, -3845, -3719, -3591, -3462, + -3331, -3200, -3068, -2935, -2801, -2667, -2531, -2395, + -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281, + -1140, -998, -856, -713, -571, -428, -285, -142 +}; + +// Initialization table for echo channel in 8 kHz +static const int16_t kChannelStored8kHz[PART_LEN1] = { + 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, + 1451, 1506, 1562, 1644, 1726, 1804, 1882, 1918, + 1953, 1982, 2010, 2025, 2040, 2034, 2027, 2021, + 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683, + 1635, 1604, 1572, 1545, 1517, 1481, 1444, 1405, + 1367, 1331, 1294, 1270, 1245, 1239, 1233, 1247, + 1260, 1282, 1303, 1338, 1373, 1407, 1441, 1470, + 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649, + 1676 +}; + +// Initialization table for echo channel in 16 kHz +static const int16_t kChannelStored16kHz[PART_LEN1] = { + 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, + 1953, 2010, 2040, 2027, 2014, 1980, 1869, 1732, + 1635, 1572, 1517, 1444, 1367, 1294, 1245, 1233, + 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621, + 1676, 1741, 1802, 1861, 1921, 1983, 2040, 2102, + 2170, 2265, 2375, 2515, 2651, 2781, 2922, 3075, + 3253, 3471, 3738, 3976, 4151, 4258, 4308, 4288, + 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484, + 3153 +}; + +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q) { + // Get new buffer position + self->far_history_pos++; + if (self->far_history_pos >= MAX_DELAY) { + self->far_history_pos = 0; + } + // Update Q-domain buffer + self->far_q_domains[self->far_history_pos] = far_q; + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]), + far_spectrum, + sizeof(uint16_t) * PART_LEN1); +} + +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, + int* far_q, + int delay) { + int buffer_position = 0; + RTC_DCHECK(self); + buffer_position = self->far_history_pos - delay; + + // Check buffer position + if (buffer_position < 0) { + buffer_position += MAX_DELAY; + } + // Get Q-domain + *far_q = self->far_q_domains[buffer_position]; + // Return far end spectrum + return &(self->far_history[buffer_position * PART_LEN1]); +} + +// Declare function pointers. +CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; +StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; +ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +AecmCore* WebRtcAecm_CreateCore() { + AecmCore* aecm = static_cast<AecmCore*>(malloc(sizeof(AecmCore))); + + aecm->farFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->farFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearNoisyFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->nearNoisyFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->nearCleanFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->nearCleanFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->outFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->outFrameBuf) + { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + aecm->delay_estimator_farend = WebRtc_CreateDelayEstimatorFarend(PART_LEN1, + MAX_DELAY); + if (aecm->delay_estimator_farend == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + aecm->delay_estimator = + WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0); + if (aecm->delay_estimator == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + // TODO(bjornv): Explicitly disable robust delay validation until no + // performance regression has been established. Then remove the line. + WebRtc_enable_robust_validation(aecm->delay_estimator, 0); + + aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT); + if (aecm->real_fft == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + + // Init some aecm pointers. 16 and 32 byte alignment is only necessary + // for Neon code currently. + aecm->xBuf = (int16_t*) (((uintptr_t)aecm->xBuf_buf + 31) & ~ 31); + aecm->dBufClean = (int16_t*) (((uintptr_t)aecm->dBufClean_buf + 31) & ~ 31); + aecm->dBufNoisy = (int16_t*) (((uintptr_t)aecm->dBufNoisy_buf + 31) & ~ 31); + aecm->outBuf = (int16_t*) (((uintptr_t)aecm->outBuf_buf + 15) & ~ 15); + aecm->channelStored = (int16_t*) (((uintptr_t) + aecm->channelStored_buf + 15) & ~ 15); + aecm->channelAdapt16 = (int16_t*) (((uintptr_t) + aecm->channelAdapt16_buf + 15) & ~ 15); + aecm->channelAdapt32 = (int32_t*) (((uintptr_t) + aecm->channelAdapt32_buf + 31) & ~ 31); + + return aecm; +} + +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path) { + int i = 0; + + // Reset the stored channel + memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1); + // Reset the adapted channels + memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1); + for (i = 0; i < PART_LEN1; i++) + { + aecm->channelAdapt32[i] = (int32_t)aecm->channelAdapt16[i] << 16; + } + + // Reset channel storing variables + aecm->mseAdaptOld = 1000; + aecm->mseStoredOld = 1000; + aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX; + aecm->mseChannelCount = 0; +} + +static void CalcLinearEnergiesC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN1; i++) + { + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); + (*far_energy) += (uint32_t)(far_spectrum[i]); + *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + (*echo_energy_stored) += (uint32_t)echo_est[i]; + } +} + +static void StoreAdaptiveChannelC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) + { + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); + echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + far_spectrum[i + 1]); + echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + far_spectrum[i + 2]); + echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + far_spectrum[i + 3]); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); +} + +static void ResetAdaptiveChannelC(AecmCore* aecm) { + int i; + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) + { + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; + aecm->channelAdapt32[i + 1] = (int32_t)aecm->channelStored[i + 1] << 16; + aecm->channelAdapt32[i + 2] = (int32_t)aecm->channelStored[i + 2] << 16; + aecm->channelAdapt32[i + 3] = (int32_t)aecm->channelStored[i + 3] << 16; + } + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} + +// Initialize function pointers for ARM Neon platform. +#if defined(WEBRTC_HAS_NEON) +static void WebRtcAecm_InitNeon(void) +{ + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon; + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon; +} +#endif + +// Initialize function pointers for MIPS platform. +#if defined(MIPS32_LE) +static void WebRtcAecm_InitMips(void) +{ +#if defined(MIPS_DSP_R1_LE) + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips; +#endif + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips; +} +#endif + +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...) +// Input: +// - aecm : Pointer to the Echo Suppression instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq) { + int i = 0; + int32_t tmp32 = PART_LEN1 * PART_LEN1; + int16_t tmp16 = PART_LEN1; + + if (samplingFreq != 8000 && samplingFreq != 16000) + { + samplingFreq = 8000; + return -1; + } + // sanity check of sampling frequency + aecm->mult = (int16_t)samplingFreq / 8000; + + aecm->farBufWritePos = 0; + aecm->farBufReadPos = 0; + aecm->knownDelay = 0; + aecm->lastKnownDelay = 0; + + WebRtc_InitBuffer(aecm->farFrameBuf); + WebRtc_InitBuffer(aecm->nearNoisyFrameBuf); + WebRtc_InitBuffer(aecm->nearCleanFrameBuf); + WebRtc_InitBuffer(aecm->outFrameBuf); + + memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf)); + memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf)); + memset(aecm->dBufNoisy_buf, 0, sizeof(aecm->dBufNoisy_buf)); + memset(aecm->outBuf_buf, 0, sizeof(aecm->outBuf_buf)); + + aecm->seed = 666; + aecm->totCount = 0; + + if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) { + return -1; + } + // Set far end histories to zero + memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY); + memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY); + aecm->far_history_pos = MAX_DELAY; + + aecm->nlpFlag = 1; + aecm->fixedDelay = -1; + + aecm->dfaCleanQDomain = 0; + aecm->dfaCleanQDomainOld = 0; + aecm->dfaNoisyQDomain = 0; + aecm->dfaNoisyQDomainOld = 0; + + memset(aecm->nearLogEnergy, 0, sizeof(aecm->nearLogEnergy)); + aecm->farLogEnergy = 0; + memset(aecm->echoAdaptLogEnergy, 0, sizeof(aecm->echoAdaptLogEnergy)); + memset(aecm->echoStoredLogEnergy, 0, sizeof(aecm->echoStoredLogEnergy)); + + // Initialize the echo channels with a stored shape. + if (samplingFreq == 8000) + { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored8kHz); + } + else + { + WebRtcAecm_InitEchoPathCore(aecm, kChannelStored16kHz); + } + + memset(aecm->echoFilt, 0, sizeof(aecm->echoFilt)); + memset(aecm->nearFilt, 0, sizeof(aecm->nearFilt)); + aecm->noiseEstCtr = 0; + + aecm->cngMode = AecmTrue; + + memset(aecm->noiseEstTooLowCtr, 0, sizeof(aecm->noiseEstTooLowCtr)); + memset(aecm->noiseEstTooHighCtr, 0, sizeof(aecm->noiseEstTooHighCtr)); + // Shape the initial noise level to an approximate pink noise. + for (i = 0; i < (PART_LEN1 >> 1) - 1; i++) + { + aecm->noiseEst[i] = (tmp32 << 8); + tmp16--; + tmp32 -= (int32_t)((tmp16 << 1) + 1); + } + for (; i < PART_LEN1; i++) + { + aecm->noiseEst[i] = (tmp32 << 8); + } + + aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX; + aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN; + aecm->farEnergyMaxMin = 0; + aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection at the + // beginning. + aecm->farEnergyMSE = 0; + aecm->currentVADValue = 0; + aecm->vadUpdateCount = 0; + aecm->firstVAD = 1; + + aecm->startupState = 0; + aecm->supGain = SUPGAIN_DEFAULT; + aecm->supGainOld = SUPGAIN_DEFAULT; + + aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + + // Assert a preprocessor definition at compile-time. It's an assumption + // used in assembly code, so check the assembly files before any change. + static_assert(PART_LEN % 16 == 0, "PART_LEN is not a multiple of 16"); + + // Initialize function pointers. + WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC; + WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC; + WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC; + +#if defined(WEBRTC_HAS_NEON) + WebRtcAecm_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcAecm_InitMips(); +#endif + return 0; +} + +// TODO(bjornv): This function is currently not used. Add support for these +// parameters from a higher level +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag) { + aecm->nlpFlag = nlpFlag; + aecm->fixedDelay = delay; + + return 0; +} + +void WebRtcAecm_FreeCore(AecmCore* aecm) { + if (aecm == NULL) { + return; + } + + WebRtc_FreeBuffer(aecm->farFrameBuf); + WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf); + WebRtc_FreeBuffer(aecm->nearCleanFrameBuf); + WebRtc_FreeBuffer(aecm->outFrameBuf); + + WebRtc_FreeDelayEstimator(aecm->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend); + WebRtcSpl_FreeRealFFT(aecm->real_fft); + + free(aecm); +} + +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out) { + int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary. + int16_t* outBlock = (int16_t*) (((uintptr_t) outBlock_buf + 15) & ~ 15); + + int16_t farFrame[FRAME_LEN]; + const int16_t* out_ptr = NULL; + int size = 0; + + // Buffer the current frame. + // Fetch an older one corresponding to the delay. + WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN); + WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay); + + // Buffer the synchronized far and near frames, + // to pass the smaller blocks individually. + WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN); + WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN); + if (nearendClean != NULL) + { + WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN); + } + + // Process as many blocks as possible. + while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN) + { + int16_t far_block[PART_LEN]; + const int16_t* far_block_ptr = NULL; + int16_t near_noisy_block[PART_LEN]; + const int16_t* near_noisy_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->farFrameBuf, (void**) &far_block_ptr, far_block, + PART_LEN); + WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf, + (void**) &near_noisy_block_ptr, + near_noisy_block, + PART_LEN); + if (nearendClean != NULL) + { + int16_t near_clean_block[PART_LEN]; + const int16_t* near_clean_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->nearCleanFrameBuf, + (void**) &near_clean_block_ptr, + near_clean_block, + PART_LEN); + if (WebRtcAecm_ProcessBlock(aecm, + far_block_ptr, + near_noisy_block_ptr, + near_clean_block_ptr, + outBlock) == -1) + { + return -1; + } + } else + { + if (WebRtcAecm_ProcessBlock(aecm, + far_block_ptr, + near_noisy_block_ptr, + NULL, + outBlock) == -1) + { + return -1; + } + } + + WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN); + } + + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + size = (int) WebRtc_available_read(aecm->outFrameBuf); + if (size < FRAME_LEN) + { + WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN); + } + + // Obtain an output frame. + WebRtc_ReadBuffer(aecm->outFrameBuf, (void**) &out_ptr, out, FRAME_LEN); + if (out_ptr != out) { + // ReadBuffer() hasn't copied to |out| in this case. + memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t)); + } + + return 0; +} + +// WebRtcAecm_AsymFilt(...) +// +// Performs asymmetric filtering. +// +// Inputs: +// - filtOld : Previous filtered value. +// - inVal : New input value. +// - stepSizePos : Step size when we have a positive contribution. +// - stepSizeNeg : Step size when we have a negative contribution. +// +// Output: +// +// Return: - Filtered value. +// +int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, const int16_t inVal, + const int16_t stepSizePos, + const int16_t stepSizeNeg) +{ + int16_t retVal; + + if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN)) + { + return inVal; + } + retVal = filtOld; + if (filtOld > inVal) + { + retVal -= (filtOld - inVal) >> stepSizeNeg; + } else + { + retVal += (inVal - filtOld) >> stepSizePos; + } + + return retVal; +} + +// ExtractFractionPart(a, zeros) +// +// returns the fraction part of |a|, with |zeros| number of leading zeros, as an +// int16_t scaled to Q8. There is no sanity check of |a| in the sense that the +// number of zeros match. +static int16_t ExtractFractionPart(uint32_t a, int zeros) { + return (int16_t)(((a << zeros) & 0x7FFFFFFF) >> 23); +} + +// Calculates and returns the log of |energy| in Q8. The input |energy| is +// supposed to be in Q(|q_domain|). +static int16_t LogOfEnergyInQ8(uint32_t energy, int q_domain) { + static const int16_t kLogLowValue = PART_LEN_SHIFT << 7; + int16_t log_energy_q8 = kLogLowValue; + if (energy > 0) { + int zeros = WebRtcSpl_NormU32(energy); + int16_t frac = ExtractFractionPart(energy, zeros); + // log2 of |energy| in Q8. + log_energy_q8 += ((31 - zeros) << 8) + frac - (q_domain << 8); + } + return log_energy_q8; +} + +// WebRtcAecm_CalcEnergies(...) +// +// This function calculates the log of energies for nearend, farend and estimated +// echoes. There is also an update of energy decision levels, i.e. internal VAD. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Pointer to farend spectrum. +// @param far_q [in] Q-domain of farend spectrum. +// @param nearEner [in] Near end energy for current block in +// Q(aecm->dfaQDomain). +// @param echoEst [out] Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst) { + // Local variables + uint32_t tmpAdapt = 0; + uint32_t tmpStored = 0; + uint32_t tmpFar = 0; + + int i; + + int16_t tmp16; + int16_t increase_max_shifts = 4; + int16_t decrease_max_shifts = 11; + int16_t increase_min_shifts = 11; + int16_t decrease_min_shifts = 3; + + // Get log of near end energy and store in buffer + + // Shift buffer + memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of integrated magnitude spectrum (nearEner) + aecm->nearLogEnergy[0] = LogOfEnergyInQ8(nearEner, aecm->dfaNoisyQDomain); + + WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, &tmpStored); + + // Shift buffers + memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy, + sizeof(int16_t) * (MAX_BUF_LEN - 1)); + + // Logarithm of delayed far end energy + aecm->farLogEnergy = LogOfEnergyInQ8(tmpFar, far_q); + + // Logarithm of estimated echo energy through adapted channel + aecm->echoAdaptLogEnergy[0] = LogOfEnergyInQ8(tmpAdapt, + RESOLUTION_CHANNEL16 + far_q); + + // Logarithm of estimated echo energy through stored channel + aecm->echoStoredLogEnergy[0] = + LogOfEnergyInQ8(tmpStored, RESOLUTION_CHANNEL16 + far_q); + + // Update farend energy levels (min, max, vad, mse) + if (aecm->farLogEnergy > FAR_ENERGY_MIN) + { + if (aecm->startupState == 0) + { + increase_max_shifts = 2; + decrease_min_shifts = 2; + increase_min_shifts = 8; + } + + aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy, + increase_min_shifts, decrease_min_shifts); + aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy, + increase_max_shifts, decrease_max_shifts); + aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin); + + // Dynamic VAD region size + tmp16 = 2560 - aecm->farEnergyMin; + if (tmp16 > 0) + { + tmp16 = (int16_t)((tmp16 * FAR_ENERGY_VAD_REGION) >> 9); + } else + { + tmp16 = 0; + } + tmp16 += FAR_ENERGY_VAD_REGION; + + if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024)) + { + // In startup phase or VAD update halted + aecm->farEnergyVAD = aecm->farEnergyMin + tmp16; + } else + { + if (aecm->farEnergyVAD > aecm->farLogEnergy) + { + aecm->farEnergyVAD += + (aecm->farLogEnergy + tmp16 - aecm->farEnergyVAD) >> 6; + aecm->vadUpdateCount = 0; + } else + { + aecm->vadUpdateCount++; + } + } + // Put MSE threshold higher than VAD + aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8); + } + + // Update VAD variables + if (aecm->farLogEnergy > aecm->farEnergyVAD) + { + if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF)) + { + // We are in startup or have significant dynamics in input speech level + aecm->currentVADValue = 1; + } + } else + { + aecm->currentVADValue = 0; + } + if ((aecm->currentVADValue) && (aecm->firstVAD)) + { + aecm->firstVAD = 0; + if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0]) + { + // The estimated echo has higher energy than the near end signal. + // This means that the initialization was too aggressive. Scale + // down by a factor 8 + for (i = 0; i < PART_LEN1; i++) + { + aecm->channelAdapt16[i] >>= 3; + } + // Compensate the adapted echo energy level accordingly. + aecm->echoAdaptLogEnergy[0] -= (3 << 8); + aecm->firstVAD = 1; + } + } +} + +// WebRtcAecm_CalcStepSize(...) +// +// This function calculates the step size used in channel estimation +// +// +// @param aecm [in] Handle of the AECM instance. +// @param mu [out] (Return value) Stepsize in log2(), i.e. number of shifts. +// +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm) { + int32_t tmp32; + int16_t tmp16; + int16_t mu = MU_MAX; + + // Here we calculate the step size mu used in the + // following NLMS based Channel estimation algorithm + if (!aecm->currentVADValue) + { + // Far end energy level too low, no channel update + mu = 0; + } else if (aecm->startupState > 0) + { + if (aecm->farEnergyMin >= aecm->farEnergyMax) + { + mu = MU_MIN; + } else + { + tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin); + tmp32 = tmp16 * MU_DIFF; + tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin); + mu = MU_MIN - 1 - (int16_t)(tmp32); + // The -1 is an alternative to rounding. This way we get a larger + // stepsize, so we in some sense compensate for truncation in NLMS + } + if (mu < MU_MAX) + { + mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX + } + } + + return mu; +} + +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. NLMS and decision on channel storage. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param far_spectrum [in] Absolute value of the farend signal in Q(far_q) +// @param far_q [in] Q-domain of the farend signal +// @param dfa [in] Absolute value of the nearend signal (Q[aecm->dfaQDomain]) +// @param mu [in] NLMS step size. +// @param echoEst [i/o] Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst) { + uint32_t tmpU32no1, tmpU32no2; + int32_t tmp32no1, tmp32no2; + int32_t mseStored; + int32_t mseAdapt; + + int i; + + int16_t zerosFar, zerosNum, zerosCh, zerosDfa; + int16_t shiftChFar, shiftNum, shift2ResChan; + int16_t tmp16no1; + int16_t xfaQ, dfaQ; + + // This is the channel estimation algorithm. It is base on NLMS but has a variable step + // length, which was calculated above. + if (mu) + { + for (i = 0; i < PART_LEN1; i++) + { + // Determine norm of channel and farend to make sure we don't get overflow in + // multiplication + zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]); + zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]); + if (zerosCh + zerosFar > 31) + { + // Multiplication is safe + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i], + far_spectrum[i]); + shiftChFar = 0; + } else + { + // We need to shift down before multiplication + shiftChFar = 32 - zerosCh - zerosFar; + tmpU32no1 = rtc::dchecked_cast<uint32_t>( + aecm->channelAdapt32[i] >> shiftChFar) * far_spectrum[i]; + } + // Determine Q-domain of numerator + zerosNum = WebRtcSpl_NormU32(tmpU32no1); + if (dfa[i]) + { + zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]); + } else + { + zerosDfa = 32; + } + tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - + RESOLUTION_CHANNEL32 - far_q + shiftChFar; + if (zerosNum > tmp16no1 + 1) + { + xfaQ = tmp16no1; + dfaQ = zerosDfa - 2; + } else + { + xfaQ = zerosNum - 2; + dfaQ = RESOLUTION_CHANNEL32 + far_q - aecm->dfaNoisyQDomain - + shiftChFar + xfaQ; + } + // Add in the same Q-domain + tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ); + tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ); + tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1; + zerosNum = WebRtcSpl_NormW32(tmp32no1); + if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q))) + { + // + // Update is needed + // + // This is what we would like to compute + // + // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * far_spectrum[i]) + // tmp32norm = (i + 1) + // aecm->channelAdapt[i] += (2^mu) * tmp32no1 + // / (tmp32norm * far_spectrum[i]) + // + + // Make sure we don't get overflow in multiplication. + if (zerosNum + zerosFar > 31) + { + if (tmp32no1 > 0) + { + tmp32no2 = (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1, + far_spectrum[i]); + } else + { + tmp32no2 = -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1, + far_spectrum[i]); + } + shiftNum = 0; + } else + { + shiftNum = 32 - (zerosNum + zerosFar); + if (tmp32no1 > 0) + { + tmp32no2 = (tmp32no1 >> shiftNum) * far_spectrum[i]; + } else + { + tmp32no2 = -((-tmp32no1 >> shiftNum) * far_spectrum[i]); + } + } + // Normalize with respect to frequency bin + tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1); + // Make sure we are in the right Q-domain + shift2ResChan = shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1); + if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan) + { + tmp32no2 = WEBRTC_SPL_WORD32_MAX; + } else + { + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan); + } + aecm->channelAdapt32[i] = + WebRtcSpl_AddSatW32(aecm->channelAdapt32[i], tmp32no2); + if (aecm->channelAdapt32[i] < 0) + { + // We can never have negative channel gain + aecm->channelAdapt32[i] = 0; + } + aecm->channelAdapt16[i] = + (int16_t)(aecm->channelAdapt32[i] >> 16); + } + } + } + // END: Adaptive channel update + + // Determine if we should store or restore the channel + if ((aecm->startupState == 0) & (aecm->currentVADValue)) + { + // During startup we store the channel every block, + // and we recalculate echo estimate + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + } else + { + if (aecm->farLogEnergy < aecm->farEnergyMSE) + { + aecm->mseChannelCount = 0; + } else + { + aecm->mseChannelCount++; + } + // Enough data for validation. Store channel if we can. + if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10)) + { + // We have enough data. + // Calculate MSE of "Adapt" and "Stored" versions. + // It is actually not MSE, but average absolute error. + mseStored = 0; + mseAdapt = 0; + for (i = 0; i < MIN_MSE_COUNT; i++) + { + tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i] + - (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseStored += tmp32no2; + + tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i] + - (int32_t)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseAdapt += tmp32no2; + } + if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt)) + & ((aecm->mseStoredOld << MSE_RESOLUTION) < (MIN_MSE_DIFF + * aecm->mseAdaptOld))) + { + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + WebRtcAecm_ResetAdaptiveChannel(aecm); + } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & (mseAdapt + < aecm->mseThreshold) & (aecm->mseAdaptOld < aecm->mseThreshold)) + { + // The adaptive channel has a significantly lower MSE than the stored one. + // The MSE for the adaptive channel has also been low for two consecutive + // calculations. Store the adaptive channel. + WebRtcAecm_StoreAdaptiveChannel(aecm, far_spectrum, echoEst); + + // Update threshold + if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX) + { + aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld); + } else + { + int scaled_threshold = aecm->mseThreshold * 5 / 8; + aecm->mseThreshold += + ((mseAdapt - scaled_threshold) * 205) >> 8; + } + + } + + // Reset counter + aecm->mseChannelCount = 0; + + // Store the MSE values. + aecm->mseStoredOld = mseStored; + aecm->mseAdaptOld = mseAdapt; + } + } + // END: Determine if we should store or reset channel estimate. +} + +// CalcSuppressionGain(...) +// +// This function calculates the suppression gain that is used in the Wiener filter. +// +// +// @param aecm [i/n] Handle of the AECM instance. +// @param supGain [out] (Return value) Suppression gain with which to scale the noise +// level (Q14). +// +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm) { + int32_t tmp32no1; + + int16_t supGain = SUPGAIN_DEFAULT; + int16_t tmp16no1; + int16_t dE = 0; + + // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far + // end energy and echo estimation error. + // Adjust for the far end signal level. A low signal level indicates no far end signal, + // hence we set the suppression gain to 0 + if (!aecm->currentVADValue) + { + supGain = 0; + } else + { + // Adjust for possible double talk. If we have large variations in estimation error we + // likely have double talk (or poor channel). + tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - ENERGY_DEV_OFFSET); + dE = WEBRTC_SPL_ABS_W16(tmp16no1); + + if (dE < ENERGY_DEV_TOL) + { + // Likely no double talk. The better estimation, the more we can suppress signal. + // Update counters + if (dE < SUPGAIN_EPC_DT) + { + tmp32no1 = aecm->supGainErrParamDiffAB * dE; + tmp32no1 += (SUPGAIN_EPC_DT >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT); + supGain = aecm->supGainErrParamA - tmp16no1; + } else + { + tmp32no1 = aecm->supGainErrParamDiffBD * (ENERGY_DEV_TOL - dE); + tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL + - SUPGAIN_EPC_DT)); + supGain = aecm->supGainErrParamD + tmp16no1; + } + } else + { + // Likely in double talk. Use default value + supGain = aecm->supGainErrParamD; + } + } + + if (supGain > aecm->supGainOld) + { + tmp16no1 = supGain; + } else + { + tmp16no1 = aecm->supGainOld; + } + aecm->supGainOld = supGain; + if (tmp16no1 < aecm->supGain) + { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } else + { + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); + } + + // END: Update suppression gain + + return aecm->supGain; +} + +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + const int farLen) { + int writeLen = farLen, writePos = 0; + + // Check if the write position must be wrapped + while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN) + { + // Write to remaining buffer space before wrapping + writeLen = FAR_BUF_LEN - aecm->farBufWritePos; + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos = 0; + writePos = writeLen; + writeLen = farLen - writeLen; + } + + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(int16_t) * writeLen); + aecm->farBufWritePos += writeLen; +} + +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay) { + int readLen = farLen; + int readPos = 0; + int delayChange = knownDelay - aecm->lastKnownDelay; + + aecm->farBufReadPos -= delayChange; + + // Check if delay forces a read position wrap + while (aecm->farBufReadPos < 0) + { + aecm->farBufReadPos += FAR_BUF_LEN; + } + while (aecm->farBufReadPos > FAR_BUF_LEN - 1) + { + aecm->farBufReadPos -= FAR_BUF_LEN; + } + + aecm->lastKnownDelay = knownDelay; + + // Check if read position must be wrapped + while (aecm->farBufReadPos + readLen > FAR_BUF_LEN) + { + + // Read from remaining buffer space before wrapping + readLen = FAR_BUF_LEN - aecm->farBufReadPos; + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos = 0; + readPos = readLen; + readLen = farLen - readLen; + } + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(int16_t) * readLen); + aecm->farBufReadPos += readLen; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core.h new file mode 100644 index 0000000000..feb997e9fa --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core.h @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs echo control (suppression) with fft routines in fixed-point. + +#ifndef MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ +#define MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aecm/aecm_defines.h" +#include "typedefs.h" // NOLINT(build/include) + +#ifdef _MSC_VER // visual c++ +#define ALIGN8_BEG __declspec(align(8)) +#define ALIGN8_END +#else // gcc or icc +#define ALIGN8_BEG +#define ALIGN8_END __attribute__((aligned(8))) +#endif + +typedef struct { + int16_t real; + int16_t imag; +} ComplexInt16; + +typedef struct { + int farBufWritePos; + int farBufReadPos; + int knownDelay; + int lastKnownDelay; + int firstVAD; // Parameter to control poorly initialized channels + + RingBuffer* farFrameBuf; + RingBuffer* nearNoisyFrameBuf; + RingBuffer* nearCleanFrameBuf; + RingBuffer* outFrameBuf; + + int16_t farBuf[FAR_BUF_LEN]; + + int16_t mult; + uint32_t seed; + + // Delay estimation variables + void* delay_estimator_farend; + void* delay_estimator; + uint16_t currentDelay; + // Far end history variables + // TODO(bjornv): Replace |far_history| with ring_buffer. + uint16_t far_history[PART_LEN1 * MAX_DELAY]; + int far_history_pos; + int far_q_domains[MAX_DELAY]; + + int16_t nlpFlag; + int16_t fixedDelay; + + uint32_t totCount; + + int16_t dfaCleanQDomain; + int16_t dfaCleanQDomainOld; + int16_t dfaNoisyQDomain; + int16_t dfaNoisyQDomainOld; + + int16_t nearLogEnergy[MAX_BUF_LEN]; + int16_t farLogEnergy; + int16_t echoAdaptLogEnergy[MAX_BUF_LEN]; + int16_t echoStoredLogEnergy[MAX_BUF_LEN]; + + // The extra 16 or 32 bytes in the following buffers are for alignment based + // Neon code. + // It's designed this way since the current GCC compiler can't align a + // buffer in 16 or 32 byte boundaries properly. + int16_t channelStored_buf[PART_LEN1 + 8]; + int16_t channelAdapt16_buf[PART_LEN1 + 8]; + int32_t channelAdapt32_buf[PART_LEN1 + 8]; + int16_t xBuf_buf[PART_LEN2 + 16]; // farend + int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend + int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend + int16_t outBuf_buf[PART_LEN + 8]; + + // Pointers to the above buffers + int16_t *channelStored; + int16_t *channelAdapt16; + int32_t *channelAdapt32; + int16_t *xBuf; + int16_t *dBufClean; + int16_t *dBufNoisy; + int16_t *outBuf; + + int32_t echoFilt[PART_LEN1]; + int16_t nearFilt[PART_LEN1]; + int32_t noiseEst[PART_LEN1]; + int noiseEstTooLowCtr[PART_LEN1]; + int noiseEstTooHighCtr[PART_LEN1]; + int16_t noiseEstCtr; + int16_t cngMode; + + int32_t mseAdaptOld; + int32_t mseStoredOld; + int32_t mseThreshold; + + int16_t farEnergyMin; + int16_t farEnergyMax; + int16_t farEnergyMaxMin; + int16_t farEnergyVAD; + int16_t farEnergyMSE; + int currentVADValue; + int16_t vadUpdateCount; + + int16_t startupState; + int16_t mseChannelCount; + int16_t supGain; + int16_t supGainOld; + + int16_t supGainErrParamA; + int16_t supGainErrParamD; + int16_t supGainErrParamDiffAB; + int16_t supGainErrParamDiffBD; + + struct RealFFT* real_fft; + +#ifdef AEC_DEBUG + FILE *farFile; + FILE *nearFile; + FILE *outFile; +#endif +} AecmCore; + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CreateCore() +// +// Allocates the memory needed by the AECM. The memory needs to be +// initialized separately using the WebRtcAecm_InitCore() function. +// Returns a pointer to the instance and a nullptr at failure. +AecmCore* WebRtcAecm_CreateCore(); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with +// WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FreeCore(...) +// +// This function releases the memory allocated by WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// +void WebRtcAecm_FreeCore(AecmCore* aecm); + +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitEchoPathCore(...) +// +// This function resets the echo channel adaptation with the specified channel. +// Input: +// - aecm : Pointer to the AECM instance +// - echo_path : Pointer to the data that should initialize the echo +// path +// +// Output: +// - aecm : Initialized instance +// +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessFrame(...) +// +// This function processes frames and sends blocks to +// WebRtcAecm_ProcessBlock(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one frame of nearend signal : +// +// +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessBlock(...) +// +// This function is called for every block within one frame +// This function is called by WebRtcAecm_ProcessFrame(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one block of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS +// +// Output: +// - out : Out buffer, one block of nearend signal : +// +// +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* noisyClean, + int16_t* out); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_BufferFarFrame() +// +// Inserts a frame of data into farend buffer. +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + const int farLen); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FetchFarFrame() +// +// Read the farend buffer to account for known delay +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// - knownDelay : known delay +// +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay); + +// All the functions below are intended to be private + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateFarHistory() +// +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q); + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_AlignedFarend() +// +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcSuppressionGain() +// +// This function calculates the suppression gain that is used in the +// Wiener filter. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - supGain : Suppression gain with which to scale the noise +// level (Q14). +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcEnergies() +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, +// i.e. internal VAD. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Pointer to farend spectrum. +// - far_q : Q-domain of farend spectrum. +// - nearEner : Near end energy for current block in +// Q(aecm->dfaQDomain). +// +// Output: +// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcStepSize() +// +// This function calculates the step size used in channel estimation +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - mu : Stepsize in log2(), i.e. number of shifts. +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm); + +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. +// NLMS and decision on channel storage. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Absolute value of the farend signal in Q(far_q) +// - far_q : Q-domain of the farend signal +// - dfa : Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// - mu : NLMS step size. +// Input/Output: +// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst); + +extern const int16_t WebRtcAecm_kCosTable[]; +extern const int16_t WebRtcAecm_kSinTable[]; + +/////////////////////////////////////////////////////////////////////////////// +// Some function pointers, for internal functions shared by ARM NEON and +// generic C code. +// +typedef void (*CalcLinearEnergies)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echoEst, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; + +typedef void (*StoreAdaptiveChannel)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); +extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; + +typedef void (*ResetAdaptiveChannel)(AecmCore* aecm); +extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file aecm_core.c, while those for ARM Neon platforms +// are declared below and defined in file aecm_core_neon.c. +#if defined(WEBRTC_HAS_NEON) +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm); +#endif + +#if defined(MIPS32_LE) +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm); +#endif +#endif + +#endif diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_c.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_c.cc new file mode 100644 index 0000000000..7fd6a499ad --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_c.cc @@ -0,0 +1,773 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" + +#include <stddef.h> +#include <stdlib.h> + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/real_fft.h" +} +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +extern "C" { +#include "system_wrappers/include/cpu_features_wrapper.h" +} + +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/sanitizer.h" +#include "typedefs.h" // NOLINT(build/include) + +// Square root of Hanning window in Q14. +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; + +#ifdef AECM_WITH_ABS_APPROX +//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation +static const uint16_t kAlpha1 = 32584; +//Q15 beta = 0.12967166976970 const Factor for magnitude approximation +static const uint16_t kBeta1 = 4249; +//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation +static const uint16_t kAlpha2 = 30879; +//Q15 beta = 0.33787806009150 const Factor for magnitude approximation +static const uint16_t kBeta2 = 11072; +//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation +static const uint16_t kAlpha3 = 26951; +//Q15 beta = 0.57762063060713 const Factor for magnitude approximation +static const uint16_t kBeta3 = 18927; +#endif + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i = 0; + + // FFT of signal + for (i = 0; i < PART_LEN; i++) { + // Window time domain signal and insert into real part of + // transformation array |fft| + int16_t scaled_time_signal = time_signal[i] << time_signal_scaling; + fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14); + scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling; + fft[PART_LEN + i] = (int16_t)(( + scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14); + } + + // Do forward FFT, then take only the first PART_LEN complex samples, + // and change signs of the imaginary parts. + WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); + for (i = 0; i < PART_LEN; i++) { + freq_signal[i].imag = -freq_signal[i].imag; + } +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, j, outCFFT; + int32_t tmp32no1; + // Reuse |efw| for the inverse FFT output after transferring + // the contents to |fft|. + int16_t* ifft_out = (int16_t*)efw; + + // Synthesis + for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) { + fft[j] = efw[i].real; + fft[j + 1] = -efw[i].imag; + } + fft[0] = efw[0].real; + fft[1] = -efw[0].imag; + + fft[PART_LEN2] = efw[PART_LEN].real; + fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; + + // Inverse FFT. Keep outCFFT to scale the samples in the next block. + outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out); + for (i = 0; i < PART_LEN; i++) { + ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i], + outCFFT - aecm->dfaCleanQDomain); + output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1 + aecm->outBuf[i], + WEBRTC_SPL_WORD16_MIN); + + tmp32no1 = (ifft_out[PART_LEN + i] * + WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14; + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, + outCFFT - aecm->dfaCleanQDomain); + aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#ifndef WEBRTC_ARCH_ARM_V7 + int16_t tmp16no2; +#endif +#ifdef AECM_WITH_ABS_APPROX + int16_t max_value = 0; + int16_t min_value = 0; + uint16_t alpha = 0; + uint16_t beta = 0; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, calculate the magnitude for + // all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + +#ifdef AECM_WITH_ABS_APPROX + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + + if(tmp16no1 > tmp16no2) + { + max_value = tmp16no1; + min_value = tmp16no2; + } else + { + max_value = tmp16no2; + min_value = tmp16no1; + } + + // Magnitude in Q(-6) + if ((max_value >> 2) > min_value) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((max_value >> 1) > min_value) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (int16_t)((max_value * alpha) >> 15); + tmp16no2 = (int16_t)((min_value * beta) >> 15); + freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2; +#else +#ifdef WEBRTC_ARCH_ARM_V7 + __asm __volatile( + "smulbb %[tmp32no1], %[real], %[real]\n\t" + "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t" + :[tmp32no1]"+&r"(tmp32no1), + [tmp32no2]"=r"(tmp32no2) + :[real]"r"(freq_signal[i].real), + [imag]"r"(freq_signal[i].imag) + ); +#else + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); +#endif // WEBRTC_ARCH_ARM_V7 + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; +#endif // AECM_WITH_ABS_APPROX + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } + + return time_signal_scaling; +} + +int RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/8200 +WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + // TODO(kma): define fft with ComplexInt16. + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31); + ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int16_t nlpGain = ONE_Q14; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) + { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + + if (nearendClean == NULL) + { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else + { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, + xfa, + PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) + { + return -1; + } + else if (delay == -2) + { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) + { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + if (far_spectrum_ptr == NULL) + { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) + { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += + rtc::dchecked_cast<int32_t>((int64_t{tmp32no1} * 50) >> 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) + { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+ + // aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else + { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) + { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else + { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + RTC_DCHECK_GE(zeros16, 0); // |zeros16| is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) + { + hnl[i] = ONE_Q14; + } else if (aecm->nearFilt[i] == 0) + { + hnl[i] = 0; + } else + { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) + { + hnl[i] = 0; + } else if (tmp32no1 < 0) + { + hnl[i] = ONE_Q14; + } else + { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] < 0) + { + hnl[i] = 0; + } + } + } + if (hnl[i]) + { + numPosCoef++; + } + } + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) + { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < PART_LEN1; i++) + { + hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) + { + avgHnl32 += (int32_t)hnl[i]; + } + RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) + { + if (hnl[i] > (int16_t)avgHnl32) + { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) + { + for (i = 0; i < PART_LEN1; i++) + { + // Truncate values close to zero and one. + if (hnl[i] > NLP_COMP_HIGH) + { + hnl[i] = ONE_Q14; + } else if (hnl[i] < NLP_COMP_LOW) + { + hnl[i] = 0; + } + + // Remove outliers + if (numPosCoef < 3) + { + nlpGain = 0; + } else + { + nlpGain = ONE_Q14; + } + + // NLP + if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) + { + hnl[i] = ONE_Q14; + } else + { + hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14); + } + + // multiply with Wiener coefficients + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + else + { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) + { + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) + { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16; + int32_t tmp32; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + int16_t noiseRShift16[PART_LEN1]; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift; + + RTC_DCHECK_GE(shiftFromNearToNoise, 0); + RTC_DCHECK_LT(shiftFromNearToNoise, 16); + + if (aecm->noiseEstCtr < 100) + { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } else + { + minTrackShift = 9; + } + + // Estimate noise power. + for (i = 0; i < PART_LEN1; i++) + { + // Shift to the noise domain. + tmp32 = (int32_t)dfa[i]; + outLShift32 = tmp32 << shiftFromNearToNoise; + + if (outLShift32 < aecm->noiseEst[i]) + { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (aecm->noiseEst[i] < (1 << minTrackShift)) + { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i]--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } + else + { + aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) + >> minTrackShift); + } + } else + { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((aecm->noiseEst[i] >> 19) > 0) + { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + aecm->noiseEst[i] >>= 11; + aecm->noiseEst[i] *= 2049; + } + else if ((aecm->noiseEst[i] >> 11) > 0) + { + // Large enough for relative increase + aecm->noiseEst[i] *= 2049; + aecm->noiseEst[i] >>= 11; + } + else + { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } + } + + for (i = 0; i < PART_LEN1; i++) + { + tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise; + if (tmp32 > 32767) + { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + noiseRShift16[i] = (int16_t)tmp32; + + tmp16 = ONE_Q14 - lambda[i]; + noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14); + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + + // Generate noise according to estimated energy. + uReal[0] = 0; // Reject LF noise. + uImag[0] = 0; + for (i = 1; i < PART_LEN1; i++) + { + // Get a random index for the cos and sin tables over [0 359]. + tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15); + + // Tables are in Q13. + uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >> + 13); + uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >> + 13); + } + uImag[PART_LEN] = 0; + + for (i = 0; i < PART_LEN1; i++) + { + out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]); + out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]); + } +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_mips.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_mips.cc new file mode 100644 index 0000000000..58e5ec5e35 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_mips.cc @@ -0,0 +1,1566 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" + +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" + +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static int16_t coefTable[] = { + 0, 4, 256, 260, 128, 132, 384, 388, + 64, 68, 320, 324, 192, 196, 448, 452, + 32, 36, 288, 292, 160, 164, 416, 420, + 96, 100, 352, 356, 224, 228, 480, 484, + 16, 20, 272, 276, 144, 148, 400, 404, + 80, 84, 336, 340, 208, 212, 464, 468, + 48, 52, 304, 308, 176, 180, 432, 436, + 112, 116, 368, 372, 240, 244, 496, 500, + 8, 12, 264, 268, 136, 140, 392, 396, + 72, 76, 328, 332, 200, 204, 456, 460, + 40, 44, 296, 300, 168, 172, 424, 428, + 104, 108, 360, 364, 232, 236, 488, 492, + 24, 28, 280, 284, 152, 156, 408, 412, + 88, 92, 344, 348, 216, 220, 472, 476, + 56, 60, 312, 316, 184, 188, 440, 444, + 120, 124, 376, 380, 248, 252, 504, 508 +}; + +static int16_t coefTable_ifft[] = { + 0, 512, 256, 508, 128, 252, 384, 380, + 64, 124, 320, 444, 192, 188, 448, 316, + 32, 60, 288, 476, 160, 220, 416, 348, + 96, 92, 352, 412, 224, 156, 480, 284, + 16, 28, 272, 492, 144, 236, 400, 364, + 80, 108, 336, 428, 208, 172, 464, 300, + 48, 44, 304, 460, 176, 204, 432, 332, + 112, 76, 368, 396, 240, 140, 496, 268, + 8, 12, 264, 500, 136, 244, 392, 372, + 72, 116, 328, 436, 200, 180, 456, 308, + 40, 52, 296, 468, 168, 212, 424, 340, + 104, 84, 360, 404, 232, 148, 488, 276, + 24, 20, 280, 484, 152, 228, 408, 356, + 88, 100, 344, 420, 216, 164, 472, 292, + 56, 36, 312, 452, 184, 196, 440, 324, + 120, 68, 376, 388, 248, 132, 504, 260 +}; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i, j; + int32_t tmp1, tmp2, tmp3, tmp4; + int16_t* pfrfi; + ComplexInt16* pfreq_signal; + int16_t f_coef, s_coef; + int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; + int32_t hann, hann1, coefs; + + memset(fft, 0, sizeof(int16_t) * PART_LEN4); + + // FFT of signal + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift], %[time_signal_scaling], -14 \n\t" + "addiu %[i], $zero, 64 \n\t" + "addiu %[load_ptr], %[time_signal], 0 \n\t" + "addiu %[hann], %[hanning], 0 \n\t" + "addiu %[hann1], %[hanning], 128 \n\t" + "addiu %[coefs], %[coefTable], 0 \n\t" + "bltz %[shift], 2f \n\t" + " negu %[shift1], %[shift] \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "sllv %[tmp1], %[tmp1], %[shift] \n\t" + "sllv %[tmp3], %[tmp3], %[shift] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "b 3f \n\t" + " nop \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "srav %[tmp1], %[tmp1], %[shift1] \n\t" + "srav %[tmp3], %[tmp3], %[shift1] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 2b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "3: \n\t" + ".set pop \n\t" + : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann), + [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs), + [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef), + [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1), + [store_ptr2] "=&r" (store_ptr2) + : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable), + [time_signal_scaling] "r" (time_signal_scaling), + [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft) + : "memory", "hi", "lo" + ); + + WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + pfrfi = fft; + pfreq_signal = freq_signal; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[j], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pfrfi]) \n\t" + "lh %[tmp2], 2(%[pfrfi]) \n\t" + "lh %[tmp3], 4(%[pfrfi]) \n\t" + "lh %[tmp4], 6(%[pfrfi]) \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 0(%[pfreq_signal]) \n\t" + "sh %[tmp2], 2(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 4(%[pfreq_signal]) \n\t" + "sh %[tmp4], 6(%[pfreq_signal]) \n\t" + "lh %[tmp1], 8(%[pfrfi]) \n\t" + "lh %[tmp2], 10(%[pfrfi]) \n\t" + "lh %[tmp3], 12(%[pfrfi]) \n\t" + "lh %[tmp4], 14(%[pfrfi]) \n\t" + "addiu %[j], %[j], -8 \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 8(%[pfreq_signal]) \n\t" + "sh %[tmp2], 10(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 12(%[pfreq_signal]) \n\t" + "sh %[tmp4], 14(%[pfreq_signal]) \n\t" + "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t" + "bgtz %[j], 1b \n\t" + " addiu %[pfrfi], %[pfrfi], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal), + [tmp4] "=&r" (tmp4) + : + : "memory" + ); +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, outCFFT; + int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; + int16_t* pcoefTable_ifft = coefTable_ifft; + int16_t* pfft = fft; + int16_t* ppfft = fft; + ComplexInt16* pefw = efw; + int32_t out_aecm; + int16_t* paecm_buf = aecm->outBuf; + const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; + const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; + int16_t* output1 = output; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 0(%[pefw]) \n\t" + "lh %[tmp_im], 2(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 4(%[pefw]) \n\t" + "lh %[tmp_im], 6(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 8(%[pefw]) \n\t" + "lh %[tmp_im], 10(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 12(%[pefw]) \n\t" + "lh %[tmp_im], 14(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t" + "addiu %[i], %[i], -4 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pefw], %[pefw], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im), + [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft), + [fft] "+r" (fft) + : + : "memory" + ); + + fft[2] = efw[PART_LEN].real; + fft[3] = -efw[PART_LEN].imag; + + outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + pfft = fft; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[ppfft]) \n\t" + "lh %[tmp2], 4(%[ppfft]) \n\t" + "lh %[tmp3], 8(%[ppfft]) \n\t" + "lh %[tmp4], 12(%[ppfft]) \n\t" + "addiu %[i], %[i], -4 \n\t" + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp2], 2(%[pfft]) \n\t" + "sh %[tmp3], 4(%[pfft]) \n\t" + "sh %[tmp4], 6(%[pfft]) \n\t" + "addiu %[ppfft], %[ppfft], 16 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pfft], %[pfft], 8 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [ppfft] "+r" (ppfft) + : + : "memory" + ); + + pfft = fft; + out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "11: \n\t" + "lh %[tmp1], 0(%[pfft]) \n\t" + "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t" + "addiu %[i], %[i], -2 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 2(%[pfft]) \n\t" + "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "addiu %[tmp1], %[tmp1], 8192 \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "addiu %[tmp3], %[tmp3], 8192 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 1f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "b 2f \n\t" + " srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "1: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "2: \n\t" + "lh %[tmp4], 0(%[paecm_buf]) \n\t" + "lh %[tmp2], 2(%[paecm_buf]) \n\t" + "addu %[tmp3], %[tmp3], %[tmp2] \n\t" + "addu %[tmp1], %[tmp1], %[tmp4] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 3f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "3: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 4f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "4: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp1], 0(%[output1]) \n\t" + "sh %[tmp3], 2(%[pfft]) \n\t" + "sh %[tmp3], 2(%[output1]) \n\t" + "lh %[tmp1], 128(%[pfft]) \n\t" + "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 130(%[pfft]) \n\t" + "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 5f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "b 6f \n\t" + " srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "5: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "6: \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 7f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "7: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 8f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "8: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[paecm_buf]) \n\t" + "sh %[tmp3], 2(%[paecm_buf]) \n\t" + "addiu %[output1], %[output1], 4 \n\t" + "addiu %[paecm_buf], %[paecm_buf], 4 \n\t" + "addiu %[pfft], %[pfft], 4 \n\t" + "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t" + "bgtz %[i], 11b \n\t" + " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i), + [pp_kSqrtHanning] "+r" (pp_kSqrtHanning), + [p_kSqrtHanning] "+r" (p_kSqrtHanning) + : [out_aecm] "r" (out_aecm), + [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning) + : "hi", "lo","memory" + ); + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + uint32_t par1 = (*far_energy); + uint32_t par2 = (*echo_energy_adapt); + uint32_t par3 = (*echo_energy_stored); + int16_t* ch_stored_p = &(aecm->channelStored[0]); + int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); + uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); + int32_t* echo_p = &(echo_est[0]); + int32_t temp0, stored0, echo0, adept0, spectrum0; + int32_t stored1, adept1, spectrum1, echo1, temp1; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN; i+= 4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[stored0], 0(%[ch_stored_p]) \n\t" + "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" + "lh %[stored1], 2(%[ch_stored_p]) \n\t" + "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[echo_p], %[echo_p], 16 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -16(%[echo_p]) \n\t" + "usw %[echo1], -12(%[echo_p]) \n\t" + "lh %[stored0], 4(%[ch_stored_p]) \n\t" + "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" + "lh %[stored1], 6(%[ch_stored_p]) \n\t" + "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" + "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" + "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -8(%[echo_p]) \n\t" + "usw %[echo1], -4(%[echo_p]) \n\t" + ".set pop \n\t" + : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0), + [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0), + [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3), + [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1), + [adept1] "=&r" (adept1), [echo1] "=&r" (echo1), + [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1), + [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p), + [spectrum_p] "+r" (spectrum_p) + : + : "hi", "lo", "memory" + ); + } + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + par1 += (uint32_t)(far_spectrum[PART_LEN]); + par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; + par3 += (uint32_t)echo_est[PART_LEN]; + + (*far_energy) = par1; + (*echo_energy_adapt) = par2; + (*echo_energy_stored) = par3; +} + +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + int16_t* temp1; + uint16_t* temp8; + int32_t temp0, temp2, temp3, temp4, temp5, temp6; + int32_t* temp7 = &(echo_est[0]); + temp1 = &(aecm->channelStored[0]); + temp8 = (uint16_t*)(&far_spectrum[0]); + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp0], 0(%[temp8]) \n\t" + "ulw %[temp2], 0(%[temp1]) \n\t" + "ulw %[temp4], 4(%[temp8]) \n\t" + "ulw %[temp5], 4(%[temp1]) \n\t" + "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" + "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" + "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" + "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" + "addiu %[temp7], %[temp7], 16 \n\t" + "addiu %[temp1], %[temp1], 8 \n\t" + "addiu %[temp8], %[temp8], 8 \n\t" + "sra %[temp3], %[temp3], 1 \n\t" + "sra %[temp0], %[temp0], 1 \n\t" + "sra %[temp6], %[temp6], 1 \n\t" + "sra %[temp4], %[temp4], 1 \n\t" + "usw %[temp3], -12(%[temp7]) \n\t" + "usw %[temp0], -16(%[temp7]) \n\t" + "usw %[temp6], -4(%[temp7]) \n\t" + "usw %[temp4], -8(%[temp7]) \n\t" + : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7) + : + : "hi", "lo", "memory" + ); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); +} + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) { + int i; + int32_t* temp3; + int16_t* temp0; + int32_t temp1, temp2, temp4, temp5; + + temp0 = &(aecm->channelStored[0]); + temp3 = &(aecm->channelAdapt32[0]); + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, + aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp1], 0(%[temp0]) \n\t" + "ulw %[temp4], 4(%[temp0]) \n\t" + "preceq.w.phl %[temp2], %[temp1] \n\t" + "preceq.w.phr %[temp1], %[temp1] \n\t" + "preceq.w.phl %[temp5], %[temp4] \n\t" + "preceq.w.phr %[temp4], %[temp4] \n\t" + "addiu %[temp0], %[temp0], 8 \n\t" + "usw %[temp2], 4(%[temp3]) \n\t" + "usw %[temp1], 0(%[temp3]) \n\t" + "usw %[temp5], 12(%[temp3]) \n\t" + "usw %[temp4], 8(%[temp3]) \n\t" + "addiu %[temp3], %[temp3], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), + [temp3] "+r" (temp3), [temp0] "+r" (temp0) + : + : "memory" + ); + } + + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#if !defined(MIPS_DSP_R2_LE) + int32_t tmp32no1; + int32_t tmp32no2; + int16_t tmp16no2; +#else + int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; + int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; + int16_t* freqp; + uint16_t* freqabsp; + uint32_t freqt0, freqt1, freqt2, freqt3; + uint32_t freqs; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, + // calculate the magnitude for all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal[PART_LEN].real = fft[PART_LEN2]; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + +#if !defined(MIPS_DSP_R2_LE) + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } +#else // #if !defined(MIPS_DSP_R2_LE) + freqs = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + freqp = &(freq_signal[1].real); + + __asm __volatile ( + "lw %[freqt0], 0(%[freqp]) \n\t" + "lw %[freqt1], 4(%[freqp]) \n\t" + "lw %[freqt2], 8(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "addiu %[freqp], %[freqp], 12 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + freq_signal_abs[1] = (uint16_t)tmp32no10; + freq_signal_abs[2] = (uint16_t)tmp32no11; + freq_signal_abs[3] = (uint16_t)tmp32no12; + freqs += (uint32_t)tmp32no10; + freqs += (uint32_t)tmp32no11; + freqs += (uint32_t)tmp32no12; + freqabsp = &(freq_signal_abs[4]); + for (i = 4; i < PART_LEN; i+=4) + { + __asm __volatile ( + "ulw %[freqt0], 0(%[freqp]) \n\t" + "ulw %[freqt1], 4(%[freqp]) \n\t" + "ulw %[freqt2], 8(%[freqp]) \n\t" + "ulw %[freqt3], 12(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "mult $ac3, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" + "addiu %[freqp], %[freqp], 16 \n\t" + "addiu %[freqabsp], %[freqabsp], 8 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + "extr.w %[tmp32no23], $ac3, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23), + [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", + "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); + + __asm __volatile ( + "sh %[tmp32no10], -8(%[freqabsp]) \n\t" + "sh %[tmp32no11], -6(%[freqabsp]) \n\t" + "sh %[tmp32no12], -4(%[freqabsp]) \n\t" + "sh %[tmp32no13], -2(%[freqabsp]) \n\t" + "addu %[freqs], %[freqs], %[tmp32no10] \n\t" + "addu %[freqs], %[freqs], %[tmp32no11] \n\t" + "addu %[freqs], %[freqs], %[tmp32no12] \n\t" + "addu %[freqs], %[freqs], %[tmp32no13] \n\t" + : [freqs] "+r" (freqs) + : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11), + [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13), + [freqabsp] "r" (freqabsp) + : "memory" + ); + } + + (*freq_signal_sum_abs) = freqs; +#endif + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31); + ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + int16_t* ptr; + int16_t* ptr1; + int16_t* er_ptr; + int16_t* dr_ptr; + + ptr = &hnl[0]; + ptr1 = &hnl[0]; + er_ptr = &efw[0].real; + dr_ptr = &dfw[0].real; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, + nearendNoisy, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) { + return -1; + } + else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += + rtc::dchecked_cast<int32_t>((int64_t{tmp32no1} * 50) >> 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16( + (uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + RTC_DCHECK_GE(zeros16, 0); // |zeros16| is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN + // - max(0, 17 - zeros16 - zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] <= 0) { + hnl[i] = 0; + } else { + numPosCoef++; + } + } + } + } + + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < (PART_LEN1 >> 3); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "lh %[temp2], 2(%[ptr1]) \n\t" + "lh %[temp3], 4(%[ptr1]) \n\t" + "lh %[temp4], 6(%[ptr1]) \n\t" + "lh %[temp5], 8(%[ptr1]) \n\t" + "lh %[temp6], 10(%[ptr1]) \n\t" + "lh %[temp7], 12(%[ptr1]) \n\t" + "lh %[temp8], 14(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "mul %[temp2], %[temp2], %[temp2] \n\t" + "mul %[temp3], %[temp3], %[temp3] \n\t" + "mul %[temp4], %[temp4], %[temp4] \n\t" + "mul %[temp5], %[temp5], %[temp5] \n\t" + "mul %[temp6], %[temp6], %[temp6] \n\t" + "mul %[temp7], %[temp7], %[temp7] \n\t" + "mul %[temp8], %[temp8], %[temp8] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "sra %[temp4], %[temp4], 14 \n\t" + "sra %[temp5], %[temp5], 14 \n\t" + "sra %[temp6], %[temp6], 14 \n\t" + "sra %[temp7], %[temp7], 14 \n\t" + "sra %[temp8], %[temp8], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "sh %[temp2], 2(%[ptr1]) \n\t" + "sh %[temp3], 4(%[ptr1]) \n\t" + "sh %[temp4], 6(%[ptr1]) \n\t" + "sh %[temp5], 8(%[ptr1]) \n\t" + "sh %[temp6], 10(%[ptr1]) \n\t" + "sh %[temp7], 12(%[ptr1]) \n\t" + "sh %[temp8], 14(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + for(i = 0; i < (PART_LEN1 & 7); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 2 \n\t" + : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + + RTC_DCHECK_GT(kMaxPrefBand - kMinPrefBand + 1, 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + if (numPosCoef < 3) { + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = 0; + efw[i].imag = 0; + hnl[i] = 0; + } + } else { + for (i = 0; i < PART_LEN1; i++) { +#if defined(MIPS_DSP_R1_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "shra_r.w %[temp2], %[temp2], 14 \n\t" + "shra_r.w %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#else + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "addiu %[temp2], %[temp2], 0x2000 \n\t" + "addiu %[temp3], %[temp3], 0x2000 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#endif + } + } + } + else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], + 14)); + efw[i].imag = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], + 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +// Generate comfort noise and add to output signal. +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; + int32_t tmp32, tmp321, tnoise, tnoise1; + int32_t tmp322, tmp323, *tmp1; + int16_t* dfap; + int16_t* lambdap; + const int32_t c2049 = 2049; + const int32_t c359 = 359; + const int32_t c114 = ONE_Q14; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift = 9; + + RTC_DCHECK_GE(shiftFromNearToNoise, 0); + RTC_DCHECK_LT(shiftFromNearToNoise, 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + int16_t* randW16p = (int16_t*)randW16; +#if defined (MIPS_DSP_R1_LE) + int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; + int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; +#endif // #if defined(MIPS_DSP_R1_LE) + tmp1 = (int32_t*)aecm->noiseEst + 1; + dfap = (int16_t*)dfa + 1; + lambdap = (int16_t*)lambda + 1; + // Estimate noise power. + for (i = 1; i < PART_LEN1; i+=2) { + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 0(%[dfap]) \n\t" + "lw %[tnoise], 0(%[tmp1]) \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32), + [tnoise] "=&r" (tnoise) + : [tmp1] "r" (tmp1), [dfap] "r" (dfap), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (tnoise < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + tnoise--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise >> 19) <= 0) { + if ((tnoise >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + "sra %[tnoise], %[tnoise], 11 \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise], 9 \n\t" + "addi %[tnoise], %[tnoise], 1 \n\t" + "addu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise], %[tnoise], 11 \n\t" + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 2(%[dfap]) \n\t" + "lw %[tnoise1], 4(%[tmp1]) \n\t" + "addiu %[dfap], %[dfap], 4 \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap), + [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1) + : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise1) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i + 1] = 0; + // Track the minimum. + if (tnoise1 < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i + 1]++; + if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { + tnoise1--; + aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i + 1] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise1 >> 19) <= 0) { + if ((tnoise1 >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + "sra %[tnoise1], %[tnoise1], 11 \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i + 1]++; + if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise1], 9 \n\t" + "addi %[tnoise1], %[tnoise1], 1 \n\t" + "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise1], %[tnoise1], 11 \n\t" + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + __asm __volatile ( + "lh %[tmp16], 0(%[lambdap]) \n\t" + "lh %[tmp161], 2(%[lambdap]) \n\t" + "sw %[tnoise], 0(%[tmp1]) \n\t" + "sw %[tnoise1], 4(%[tmp1]) \n\t" + "subu %[tmp16], %[c114], %[tmp16] \n\t" + "subu %[tmp161], %[c114], %[tmp161] \n\t" + "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" + "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" + "addiu %[lambdap], %[lambdap], 4 \n\t" + "addiu %[tmp1], %[tmp1], 8 \n\t" + : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1), + [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap) + : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + if (tmp321 > 32767) { + tmp321 = 32767; + aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise; + } + + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[tmp16] \n\t" + "mul %[tmp321], %[tmp321], %[tmp161] \n\t" + "sra %[nrsh1], %[tmp32], 14 \n\t" + "sra %[nrsh2], %[tmp321], 14 \n\t" + : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2) + : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32), + [tmp321] "r" (tmp321) + : "memory", "hi", "lo" + ); + + __asm __volatile ( + "lh %[tmp32], 0(%[randW16p]) \n\t" + "lh %[tmp321], 2(%[randW16p]) \n\t" + "addiu %[randW16p], %[randW16p], 4 \n\t" + "mul %[tmp32], %[tmp32], %[c359] \n\t" + "mul %[tmp321], %[tmp321], %[c359] \n\t" + "sra %[tmp16], %[tmp32], 15 \n\t" + "sra %[tmp161], %[tmp321], 15 \n\t" + : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32), + [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321) + : [c359] "r" (c359) + : "memory", "hi", "lo" + ); + +#if !defined(MIPS_DSP_R1_LE) + tmp32 = WebRtcAecm_kCosTable[tmp16]; + tmp321 = WebRtcAecm_kSinTable[tmp16]; + tmp322 = WebRtcAecm_kCosTable[tmp161]; + tmp323 = WebRtcAecm_kSinTable[tmp161]; +#else + __asm __volatile ( + "sll %[tmp16], %[tmp16], 1 \n\t" + "sll %[tmp161], %[tmp161], 1 \n\t" + "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" + "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" + "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" + "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" + : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), + [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323) + : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16), + [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep) + : "memory" + ); +#endif + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" + "negu %[tmp162], %[nrsh1] \n\t" + "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" + "negu %[tmp163], %[nrsh2] \n\t" + "sra %[tmp32], %[tmp32], 13 \n\t" + "mul %[tmp321], %[tmp321], %[tmp162] \n\t" + "sra %[tmp322], %[tmp322], 13 \n\t" + "mul %[tmp323], %[tmp323], %[tmp163] \n\t" + "sra %[tmp321], %[tmp321], 13 \n\t" + "sra %[tmp323], %[tmp323], 13 \n\t" + : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162), + [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163) + : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2) + : "hi", "lo" + ); + // Tables are in Q13. + uReal[i] = (int16_t)tmp32; + uImag[i] = (int16_t)tmp321; + uReal[i + 1] = (int16_t)tmp322; + uImag[i + 1] = (int16_t)tmp323; + } + + int32_t tt, sgn; + tt = out[0].real; + sgn = ((int)tt) >> 31; + out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[0].imag; + sgn = ((int)tt) >> 31; + out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + for (i = 1; i < PART_LEN; i++) { + tt = out[i].real + uReal[i]; + sgn = ((int)tt) >> 31; + out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[i].imag + uImag[i]; + sgn = ((int)tt) >> 31; + out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + } + tt = out[PART_LEN].real + uReal[PART_LEN]; + sgn = ((int)tt) >> 31; + out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[PART_LEN].imag; + sgn = ((int)tt) >> 31; + out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_neon.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_neon.cc new file mode 100644 index 0000000000..ca7211f738 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_neon.cc @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/aecm_core.h" + +#include <arm_neon.h> + +#include "common_audio/signal_processing/include/real_fft.h" +#include "rtc_base/checks.h" + +// TODO(kma): Re-write the corresponding assembly file, the offset +// generating script and makefile, to replace these C functions. + +static inline void AddLanes(uint32_t* ptr, uint32x4_t v) { +#if defined(WEBRTC_ARCH_ARM64) + *(ptr) = vaddvq_u32(v); +#else + uint32x2_t tmp_v; + tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + tmp_v = vpadd_u32(tmp_v, tmp_v); + *(ptr) = vget_lane_u32(tmp_v, 0); +#endif +} + +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt_p = aecm->channelAdapt16; + int32_t* echo_est_p = echo_est; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + const uint16_t* far_spectrum_p = far_spectrum; + int16x8_t store_v, adapt_v; + uint16x8_t spectrum_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v; + + far_energy_v = vdupq_n_u32(0); + echo_adapt_v = vdupq_n_u32(0); + echo_stored_v = vdupq_n_u32(0); + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + // The C code: + // for (i = 0; i < PART_LEN1; i++) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // (*far_energy) += (uint32_t)(far_spectrum[i]); + // *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + // (*echo_energy_stored) += (uint32_t)echo_est[i]; + // } + while (start_stored_p < end_stored_p) { + spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + store_v = vld1q_s16(start_stored_p); + + far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v)); + far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v)); + + echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)), + vget_low_u16(spectrum_v)); + echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)), + vget_high_u16(spectrum_v)); + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v); + echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v); + + echo_adapt_v = vmlal_u16(echo_adapt_v, + vreinterpret_u16_s16(vget_low_s16(adapt_v)), + vget_low_u16(spectrum_v)); + echo_adapt_v = vmlal_u16(echo_adapt_v, + vreinterpret_u16_s16(vget_high_s16(adapt_v)), + vget_high_u16(spectrum_v)); + + start_stored_p += 8; + start_adapt_p += 8; + far_spectrum_p += 8; + echo_est_p += 8; + } + + AddLanes(far_energy, far_energy_v); + AddLanes(echo_energy_stored, echo_stored_v); + AddLanes(echo_energy_adapt, echo_adapt_v); + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + *echo_energy_stored += (uint32_t)echo_est[PART_LEN]; + *far_energy += (uint32_t)far_spectrum[PART_LEN]; + *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; +} + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + RTC_DCHECK_EQ(0, (uintptr_t)echo_est % 32); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelStored % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt16 % 16); + + // This is C code of following optimized code. + // During startup we store the channel every block. + // memcpy(aecm->channelStored, + // aecm->channelAdapt16, + // sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + // for (i = 0; i < PART_LEN; i += 4) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + // far_spectrum[i + 1]); + // echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + // far_spectrum[i + 2]); + // echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + // far_spectrum[i + 3]); + // } + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + const uint16_t* far_spectrum_p = far_spectrum; + int16_t* start_adapt_p = aecm->channelAdapt16; + int16_t* start_stored_p = aecm->channelStored; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + int32_t* echo_est_p = echo_est; + + uint16x8_t far_spectrum_v; + int16x8_t adapt_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + + while (start_stored_p < end_stored_p) { + far_spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + + vst1q_s16(start_stored_p, adapt_v); + + echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v), + vget_low_u16(vreinterpretq_u16_s16(adapt_v))); + echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v), + vget_high_u16(vreinterpretq_u16_s16(adapt_v))); + + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); + + far_spectrum_p += 8; + start_adapt_p += 8; + start_stored_p += 8; + echo_est_p += 8; + } + aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN]; + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); +} + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) { + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelStored % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt16 % 16); + RTC_DCHECK_EQ(0, (uintptr_t)aecm->channelAdapt32 % 32); + + // The C code of following optimized code. + // for (i = 0; i < PART_LEN1; i++) { + // aecm->channelAdapt16[i] = aecm->channelStored[i]; + // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( + // (int32_t)aecm->channelStored[i], 16); + // } + + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt16_p = aecm->channelAdapt16; + int32_t* start_adapt32_p = aecm->channelAdapt32; + const int16_t* end_stored_p = start_stored_p + PART_LEN; + + int16x8_t stored_v; + int32x4_t adapt32_v_low, adapt32_v_high; + + while (start_stored_p < end_stored_p) { + stored_v = vld1q_s16(start_stored_p); + vst1q_s16(start_adapt16_p, stored_v); + + adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16); + adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16); + + vst1q_s32(start_adapt32_p, adapt32_v_low); + vst1q_s32(start_adapt32_p + 4, adapt32_v_high); + + start_stored_p += 8; + start_adapt16_p += 8; + start_adapt32_p += 8; + } + aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN]; + aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_defines.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_defines.h new file mode 100644 index 0000000000..ae2d2bc03a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_defines.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ +#define MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ + +#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */ + +/* Algorithm parameters */ +#define FRAME_LEN 80 /* Total frame length, 10 ms. */ + +#define PART_LEN 64 /* Length of partition. */ +#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */ + +#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */ +#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */ +#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */ +#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */ +#define MAX_DELAY 100 + +/* Counter parameters */ +#define CONV_LEN 512 /* Convergence length used at startup. */ +#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */ + +/* Energy parameters */ +#define MAX_BUF_LEN 64 /* History length of energy signals. */ +#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */ + /* in energy. */ +#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */ + /* and min. */ +#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */ +#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */ +#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */ + +/* Stepsize parameters */ +#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */ + /* dependent). */ +#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */ + /* dependent). */ +#define MU_DIFF 9 /* MU_MIN - MU_MAX */ + +/* Channel parameters */ +#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */ + /* far end energy to compare channel estimates. */ +#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */ + /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */ +#define MSE_RESOLUTION 5 /* MSE parameter resolution. */ +#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */ +#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */ +#define CHANNEL_VAD 16 /* Minimum energy in frequency band */ + /* to update channel. */ + +/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */ +#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */ +#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */ +#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */ + /* (Maximum gain) (8 in Q8). */ +#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */ + /* (Gain before going down). */ +#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */ + /* (Should be the same as Default) (1 in Q8). */ +#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */ + +/* Defines for "check delay estimation" */ +#define CORR_WIDTH 31 /* Number of samples to correlate over. */ +#define CORR_MAX 16 /* Maximum correlation offset. */ +#define CORR_MAX_BUF 63 +#define CORR_DEV 4 +#define CORR_MAX_LEVEL 20 +#define CORR_MAX_LOW 4 +#define CORR_BUF_LEN (CORR_MAX << 1) + 1 +/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */ + +#define ONE_Q14 (1 << 14) + +/* NLP defines */ +#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */ +#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */ + +#endif diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/echo_control_mobile.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/echo_control_mobile.cc new file mode 100644 index 0000000000..880f0aa65a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/echo_control_mobile.cc @@ -0,0 +1,648 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aecm/echo_control_mobile.h" + +#ifdef AEC_DEBUG +#include <stdio.h> +#endif +#include <stdlib.h> + +extern "C" { +#include "common_audio/ring_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +} +#include "modules/audio_processing/aecm/aecm_core.h" + +#define BUF_SIZE_FRAMES 50 // buffer size (frames) +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) +static const int kSampMsNb = 8; // samples per ms in nb +// Target suppression levels for nlp modes +// log{0.001, 0.00001, 0.00000001} +static const int kInitCheck = 42; + +typedef struct +{ + int sampFreq; + int scSampFreq; + short bufSizeStart; + int knownDelay; + + // Stores the last frame added to the farend buffer + short farendOld[2][FRAME_LEN]; + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + short sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; + int timeForDelayChange; + int ECstartup; + int checkBuffSize; + int delayChange; + short lastDelayDiff; + + int16_t echoMode; + +#ifdef AEC_DEBUG + FILE *bufFile; + FILE *delayFile; + FILE *preCompFile; + FILE *postCompFile; +#endif // AEC_DEBUG + // Structures + RingBuffer *farendBuf; + + AecmCore* aecmCore; +} AecMobile; + +// Estimates delay to set the position of the farend buffer read pointer +// (controlled by knownDelay) +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf); + +// Stuffs the farend buffer if the estimated delay is too large +static int WebRtcAecm_DelayComp(AecMobile* aecm); + +void* WebRtcAecm_Create() { + AecMobile* aecm = static_cast<AecMobile*>(malloc(sizeof(AecMobile))); + + WebRtcSpl_Init(); + + aecm->aecmCore = WebRtcAecm_CreateCore(); + if (!aecm->aecmCore) { + WebRtcAecm_Free(aecm); + return NULL; + } + + aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp, + sizeof(int16_t)); + if (!aecm->farendBuf) + { + WebRtcAecm_Free(aecm); + return NULL; + } + + aecm->initFlag = 0; + +#ifdef AEC_DEBUG + aecm->aecmCore->farFile = fopen("aecFar.pcm","wb"); + aecm->aecmCore->nearFile = fopen("aecNear.pcm","wb"); + aecm->aecmCore->outFile = fopen("aecOut.pcm","wb"); + //aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb"); + + aecm->bufFile = fopen("aecBuf.dat", "wb"); + aecm->delayFile = fopen("aecDelay.dat", "wb"); + aecm->preCompFile = fopen("preComp.pcm", "wb"); + aecm->postCompFile = fopen("postComp.pcm", "wb"); +#endif // AEC_DEBUG + return aecm; +} + +void WebRtcAecm_Free(void* aecmInst) { + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + + if (aecm == NULL) { + return; + } + +#ifdef AEC_DEBUG + fclose(aecm->aecmCore->farFile); + fclose(aecm->aecmCore->nearFile); + fclose(aecm->aecmCore->outFile); + //fclose(aecm->aecmCore->outLpFile); + + fclose(aecm->bufFile); + fclose(aecm->delayFile); + fclose(aecm->preCompFile); + fclose(aecm->postCompFile); +#endif // AEC_DEBUG + WebRtcAecm_FreeCore(aecm->aecmCore); + WebRtc_FreeBuffer(aecm->farendBuf); + free(aecm); +} + +int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq) +{ + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + AecmConfig aecConfig; + + if (aecm == NULL) + { + return -1; + } + + if (sampFreq != 8000 && sampFreq != 16000) + { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->sampFreq = sampFreq; + + // Initialize AECM core + if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1) + { + return AECM_UNSPECIFIED_ERROR; + } + + // Initialize farend buffer + WebRtc_InitBuffer(aecm->farendBuf); + + aecm->initFlag = kInitCheck; // indicates that initialization has been done + + aecm->delayChange = 1; + + aecm->sum = 0; + aecm->counter = 0; + aecm->checkBuffSize = 1; + aecm->firstVal = 0; + + aecm->ECstartup = 1; + aecm->bufSizeStart = 0; + aecm->checkBufSizeCtr = 0; + aecm->filtDelay = 0; + aecm->timeForDelayChange = 0; + aecm->knownDelay = 0; + aecm->lastDelayDiff = 0; + + memset(&aecm->farendOld[0][0], 0, 160); + + // Default settings. + aecConfig.cngMode = AecmTrue; + aecConfig.echoMode = 3; + + if (WebRtcAecm_set_config(aecm, aecConfig) == -1) + { + return AECM_UNSPECIFIED_ERROR; + } + + return 0; +} + +// Returns any error that is caused when buffering the +// farend signal. +int32_t WebRtcAecm_GetBufferFarendError(void *aecmInst, const int16_t *farend, + size_t nrOfSamples) { + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + + if (aecm == NULL) + return -1; + + if (farend == NULL) + return AECM_NULL_POINTER_ERROR; + + if (aecm->initFlag != kInitCheck) + return AECM_UNINITIALIZED_ERROR; + + if (nrOfSamples != 80 && nrOfSamples != 160) + return AECM_BAD_PARAMETER_ERROR; + + return 0; +} + + +int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend, + size_t nrOfSamples) { + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + + const int32_t err = + WebRtcAecm_GetBufferFarendError(aecmInst, farend, nrOfSamples); + + if (err != 0) + return err; + + // TODO(unknown): Is this really a good idea? + if (!aecm->ECstartup) + { + WebRtcAecm_DelayComp(aecm); + } + + WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); + + return 0; +} + +int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy, + const int16_t *nearendClean, int16_t *out, + size_t nrOfSamples, int16_t msInSndCardBuf) +{ + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + int32_t retVal = 0; + size_t i; + short nmbrOfFilledBuffers; + size_t nBlocks10ms; + size_t nFrames; +#ifdef AEC_DEBUG + short msInAECBuf; +#endif + + if (aecm == NULL) + { + return -1; + } + + if (nearendNoisy == NULL) + { + return AECM_NULL_POINTER_ERROR; + } + + if (out == NULL) + { + return AECM_NULL_POINTER_ERROR; + } + + if (aecm->initFlag != kInitCheck) + { + return AECM_UNINITIALIZED_ERROR; + } + + if (nrOfSamples != 80 && nrOfSamples != 160) + { + return AECM_BAD_PARAMETER_ERROR; + } + + if (msInSndCardBuf < 0) + { + msInSndCardBuf = 0; + retVal = AECM_BAD_PARAMETER_WARNING; + } else if (msInSndCardBuf > 500) + { + msInSndCardBuf = 500; + retVal = AECM_BAD_PARAMETER_WARNING; + } + msInSndCardBuf += 10; + aecm->msInSndCardBuf = msInSndCardBuf; + + nFrames = nrOfSamples / FRAME_LEN; + nBlocks10ms = nFrames / aecm->aecmCore->mult; + + if (aecm->ECstartup) + { + if (nearendClean == NULL) + { + if (out != nearendNoisy) + { + memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples); + } + } else if (out != nearendClean) + { + memcpy(out, nearendClean, sizeof(short) * nrOfSamples); + } + + nmbrOfFilledBuffers = + (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + // The AECM is in the start up mode + // AECM is disabled until the soundcard buffer and farend buffers are OK + + // Mechanism to ensure that the soundcard buffer is reasonably stable. + if (aecm->checkBuffSize) + { + aecm->checkBufSizeCtr++; + // Before we fill up the far end buffer we require the amount of data on the + // sound card to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 4 consecutive frames. If it seems + // to be stable then we start to fill up the far end buffer. + + if (aecm->counter == 0) + { + aecm->firstVal = aecm->msInSndCardBuf; + aecm->sum = 0; + } + + if (abs(aecm->firstVal - aecm->msInSndCardBuf) + < WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb)) + { + aecm->sum += aecm->msInSndCardBuf; + aecm->counter++; + } else + { + aecm->counter = 0; + } + + if (aecm->counter * nBlocks10ms >= 6) + { + // The farend buffer size is determined in blocks of 80 samples + // Use 75% of the average value of the soundcard buffer + aecm->bufSizeStart + = WEBRTC_SPL_MIN((3 * aecm->sum + * aecm->aecmCore->mult) / (aecm->counter * 40), BUF_SIZE_FRAMES); + // buffersize has now been determined + aecm->checkBuffSize = 0; + } + + if (aecm->checkBufSizeCtr * nBlocks10ms > 50) + { + // for really bad sound cards, don't disable echocanceller for more than 0.5 sec + aecm->bufSizeStart = WEBRTC_SPL_MIN((3 * aecm->msInSndCardBuf + * aecm->aecmCore->mult) / 40, BUF_SIZE_FRAMES); + aecm->checkBuffSize = 0; + } + } + + // if checkBuffSize changed in the if-statement above + if (!aecm->checkBuffSize) + { + // soundcard buffer is now reasonably stable + // When the far end buffer is filled with approximately the same amount of + // data as the amount on the sound card we end the start up phase and start + // to cancel echoes. + + if (nmbrOfFilledBuffers == aecm->bufSizeStart) + { + aecm->ECstartup = 0; // Enable the AECM + } else if (nmbrOfFilledBuffers > aecm->bufSizeStart) + { + WebRtc_MoveReadPtr(aecm->farendBuf, + (int) WebRtc_available_read(aecm->farendBuf) + - (int) aecm->bufSizeStart * FRAME_LEN); + aecm->ECstartup = 0; + } + } + + } else + { + // AECM is enabled + + // Note only 1 block supported for nb and 2 blocks for wb + for (i = 0; i < nFrames; i++) + { + int16_t farend[FRAME_LEN]; + const int16_t* farend_ptr = NULL; + + nmbrOfFilledBuffers = + (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; + + // Check that there is data in the far end buffer + if (nmbrOfFilledBuffers > 0) + { + // Get the next 80 samples from the farend buffer + WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend, + FRAME_LEN); + + // Always store the last frame for use when we run out of data + memcpy(&(aecm->farendOld[i][0]), farend_ptr, + FRAME_LEN * sizeof(short)); + } else + { + // We have no data so we use the last played frame + memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short)); + farend_ptr = farend; + } + + // Call buffer delay estimator when all data is extracted, + // i,e. i = 0 for NB and i = 1 for WB + if ((i == 0 && aecm->sampFreq == 8000) || (i == 1 && aecm->sampFreq == 16000)) + { + WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf); + } + + // Call the AECM + /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i], + &out[FRAME_LEN * i], aecm->knownDelay);*/ + if (WebRtcAecm_ProcessFrame(aecm->aecmCore, + farend_ptr, + &nearendNoisy[FRAME_LEN * i], + (nearendClean + ? &nearendClean[FRAME_LEN * i] + : NULL), + &out[FRAME_LEN * i]) == -1) + return -1; + } + } + +#ifdef AEC_DEBUG + msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) / + (kSampMsNb * aecm->aecmCore->mult); + fwrite(&msInAECBuf, 2, 1, aecm->bufFile); + fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile); +#endif + + return retVal; +} + +int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config) +{ + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + + if (aecm == NULL) + { + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + return AECM_UNINITIALIZED_ERROR; + } + + if (config.cngMode != AecmFalse && config.cngMode != AecmTrue) + { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->aecmCore->cngMode = config.cngMode; + + if (config.echoMode < 0 || config.echoMode > 4) + { + return AECM_BAD_PARAMETER_ERROR; + } + aecm->echoMode = config.echoMode; + + if (aecm->echoMode == 0) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 3) + - (SUPGAIN_ERROR_PARAM_B >> 3); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 3) + - (SUPGAIN_ERROR_PARAM_D >> 3); + } else if (aecm->echoMode == 1) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 2) + - (SUPGAIN_ERROR_PARAM_B >> 2); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 2) + - (SUPGAIN_ERROR_PARAM_D >> 2); + } else if (aecm->echoMode == 2) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 1) + - (SUPGAIN_ERROR_PARAM_B >> 1); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 1) + - (SUPGAIN_ERROR_PARAM_D >> 1); + } else if (aecm->echoMode == 3) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->aecmCore->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->aecmCore->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + } else if (aecm->echoMode == 4) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A << 1) + - (SUPGAIN_ERROR_PARAM_B << 1); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B << 1) + - (SUPGAIN_ERROR_PARAM_D << 1); + } + + return 0; +} + +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes) +{ + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + const int16_t* echo_path_ptr = static_cast<const int16_t*>(echo_path); + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + return AECM_NULL_POINTER_ERROR; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) + { + // Input channel size does not match the size of AECM + return AECM_BAD_PARAMETER_ERROR; + } + if (aecm->initFlag != kInitCheck) + { + return AECM_UNINITIALIZED_ERROR; + } + + WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr); + + return 0; +} + +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes) +{ + AecMobile* aecm = static_cast<AecMobile*>(aecmInst); + int16_t* echo_path_ptr = static_cast<int16_t*>(echo_path); + + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + return AECM_NULL_POINTER_ERROR; + } + if (size_bytes != WebRtcAecm_echo_path_size_bytes()) + { + // Input channel size does not match the size of AECM + return AECM_BAD_PARAMETER_ERROR; + } + if (aecm->initFlag != kInitCheck) + { + return AECM_UNINITIALIZED_ERROR; + } + + memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes); + return 0; +} + +size_t WebRtcAecm_echo_path_size_bytes() +{ + return (PART_LEN1 * sizeof(int16_t)); +} + + +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) { + short delayNew, nSampSndCard; + short nSampFar = (short) WebRtc_available_read(aecm->farendBuf); + short diff; + + nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + + delayNew = nSampSndCard - nSampFar; + + if (delayNew < FRAME_LEN) + { + WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN); + delayNew += FRAME_LEN; + } + + aecm->filtDelay = WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10); + + diff = aecm->filtDelay - aecm->knownDelay; + if (diff > 224) + { + if (aecm->lastDelayDiff < 96) + { + aecm->timeForDelayChange = 0; + } else + { + aecm->timeForDelayChange++; + } + } else if (diff < 96 && aecm->knownDelay > 0) + { + if (aecm->lastDelayDiff > 224) + { + aecm->timeForDelayChange = 0; + } else + { + aecm->timeForDelayChange++; + } + } else + { + aecm->timeForDelayChange = 0; + } + aecm->lastDelayDiff = diff; + + if (aecm->timeForDelayChange > 25) + { + aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0); + } + return 0; +} + +static int WebRtcAecm_DelayComp(AecMobile* aecm) { + int nSampFar = (int) WebRtc_available_read(aecm->farendBuf); + int nSampSndCard, delayNew, nSampAdd; + const int maxStuffSamp = 10 * FRAME_LEN; + + nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + delayNew = nSampSndCard - nSampFar; + + if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult) + { + // The difference of the buffer sizes is larger than the maximum + // allowed known delay. Compensate by stuffing the buffer. + nSampAdd = (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar), + FRAME_LEN)); + nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); + + WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd); + aecm->delayChange = 1; // the delay needs to be updated + } + + return 0; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/echo_control_mobile.h b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/echo_control_mobile.h new file mode 100644 index 0000000000..e0091c3e57 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/echo_control_mobile.h @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ +#define MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ + +#include <stdlib.h> + +#include "typedefs.h" // NOLINT(build/include) + +enum { + AecmFalse = 0, + AecmTrue +}; + +// Errors +#define AECM_UNSPECIFIED_ERROR 12000 +#define AECM_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AECM_UNINITIALIZED_ERROR 12002 +#define AECM_NULL_POINTER_ERROR 12003 +#define AECM_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AECM_BAD_PARAMETER_WARNING 12100 + +typedef struct { + int16_t cngMode; // AECM_FALSE, AECM_TRUE (default) + int16_t echoMode; // 0, 1, 2, 3 (default), 4 +} AecmConfig; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AECM. The memory needs to be + * initialized separately using the WebRtcAecm_Init() function. + * Returns a pointer to the instance and a nullptr at failure. + */ +void* WebRtcAecm_Create(); + +/* + * This function releases the memory allocated by WebRtcAecm_Create() + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + */ +void WebRtcAecm_Free(void* aecmInst); + +/* + * Initializes an AECM instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int32_t sampFreq Sampling frequency of data + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_BufferFarend(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* + * Reports any errors that would arise when buffering a farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_GetBufferFarendError(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* + * Runs the AECM on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* nearendNoisy In buffer containing one frame of + * reference nearend+echo signal. If + * noise reduction is active, provide + * the noisy signal here. + * int16_t* nearendClean In buffer containing one frame of + * nearend+echo signal. If noise + * reduction is active, provide the + * clean signal here. Otherwise pass a + * NULL pointer. + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and + * system buffers + * + * Outputs Description + * ------------------------------------------------------------------- + * int16_t* out Out buffer, one frame of processed nearend + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_Process(void* aecmInst, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out, + size_t nrOfSamples, + int16_t msInSndCardBuf); + +/* + * This function enables the user to set certain parameters on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * AecmConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config); + +/* + * This function enables the user to set the echo path on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to the echo path to be set + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the currently used echo path + * on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * void* echo_path Pointer to echo path + * size_t size_bytes Size in bytes of the echo path + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes); + +/* + * This function enables the user to get the echo path size in bytes + * + * Outputs Description + * ------------------------------------------------------------------- + * size_t return Size in bytes + */ +size_t WebRtcAecm_echo_path_size_bytes(); + + +#ifdef __cplusplus +} +#endif +#endif // MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc.cc new file mode 100644 index 0000000000..0c6478e803 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc.h" + +#include <cmath> +#include <cstdlib> + +#include <algorithm> +#include <vector> + +#include "modules/audio_processing/agc/loudness_histogram.h" +#include "modules/audio_processing/agc/utility.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +const int kDefaultLevelDbfs = -18; +const int kNumAnalysisFrames = 100; +const double kActivityThreshold = 0.3; + +} // namespace + +Agc::Agc() + : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)), + target_level_dbfs_(kDefaultLevelDbfs), + histogram_(LoudnessHistogram::Create(kNumAnalysisFrames)), + inactive_histogram_(LoudnessHistogram::Create()) {} + +Agc::~Agc() {} + +float Agc::AnalyzePreproc(const int16_t* audio, size_t length) { + RTC_DCHECK_GT(length, 0); + size_t num_clipped = 0; + for (size_t i = 0; i < length; ++i) { + if (audio[i] == 32767 || audio[i] == -32768) + ++num_clipped; + } + return 1.0f * num_clipped / length; +} + +int Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) { + vad_.ProcessChunk(audio, length, sample_rate_hz); + const std::vector<double>& rms = vad_.chunkwise_rms(); + const std::vector<double>& probabilities = + vad_.chunkwise_voice_probabilities(); + RTC_DCHECK_EQ(rms.size(), probabilities.size()); + for (size_t i = 0; i < rms.size(); ++i) { + histogram_->Update(rms[i], probabilities[i]); + } + return 0; +} + +bool Agc::GetRmsErrorDb(int* error) { + if (!error) { + RTC_NOTREACHED(); + return false; + } + + if (histogram_->num_updates() < kNumAnalysisFrames) { + // We haven't yet received enough frames. + return false; + } + + if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) { + // We are likely in an inactive segment. + return false; + } + + double loudness = Linear2Loudness(histogram_->CurrentRms()); + *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5); + histogram_->Reset(); + return true; +} + +void Agc::Reset() { + histogram_->Reset(); +} + +int Agc::set_target_level_dbfs(int level) { + // TODO(turajs): just some arbitrary sanity check. We can come up with better + // limits. The upper limit should be chosen such that the risk of clipping is + // low. The lower limit should not result in a too quiet signal. + if (level >= 0 || level <= -100) + return -1; + target_level_dbfs_ = level; + target_level_loudness_ = Dbfs2Loudness(level); + return 0; +} + +int Agc::target_level_dbfs() const { + return target_level_dbfs_; +} + +float Agc::voice_probability() const { + return vad_.last_voice_probability(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc.h new file mode 100644 index 0000000000..3a8d5c8122 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_H_ + +#include <memory> + +#include "modules/audio_processing/vad/voice_activity_detector.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class AudioFrame; +class LoudnessHistogram; + +class Agc { + public: + Agc(); + virtual ~Agc(); + + // Returns the proportion of samples in the buffer which are at full-scale + // (and presumably clipped). + virtual float AnalyzePreproc(const int16_t* audio, size_t length); + // |audio| must be mono; in a multi-channel stream, provide the first (usually + // left) channel. + virtual int Process(const int16_t* audio, size_t length, int sample_rate_hz); + + // Retrieves the difference between the target RMS level and the current + // signal RMS level in dB. Returns true if an update is available and false + // otherwise, in which case |error| should be ignored and no action taken. + virtual bool GetRmsErrorDb(int* error); + virtual void Reset(); + + virtual int set_target_level_dbfs(int level); + virtual int target_level_dbfs() const; + virtual float voice_probability() const; + + private: + double target_level_loudness_; + int target_level_dbfs_; + std::unique_ptr<LoudnessHistogram> histogram_; + std::unique_ptr<LoudnessHistogram> inactive_histogram_; + VoiceActivityDetector vad_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct.cc new file mode 100644 index 0000000000..706d4ab037 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct.cc @@ -0,0 +1,461 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include <cmath> + +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include <cstdio> +#endif + +#include "modules/audio_processing/agc/gain_map_internal.h" +#include "modules/audio_processing/gain_control_impl.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { + +namespace { + +// Amount the microphone level is lowered with every clipping event. +const int kClippedLevelStep = 15; +// Proportion of clipped samples required to declare a clipping event. +const float kClippedRatioThreshold = 0.1f; +// Time in frames to wait after a clipping event before checking again. +const int kClippedWaitFrames = 300; + +// Amount of error we tolerate in the microphone level (presumably due to OS +// quantization) before we assume the user has manually adjusted the microphone. +const int kLevelQuantizationSlack = 25; + +const int kDefaultCompressionGain = 7; +const int kMaxCompressionGain = 12; +const int kMinCompressionGain = 2; +// Controls the rate of compression changes towards the target. +const float kCompressionGainStep = 0.05f; + +const int kMaxMicLevel = 255; +static_assert(kGainMapSize > kMaxMicLevel, "gain map too small"); +const int kMinMicLevel = 12; + +// Prevent very large microphone level changes. +const int kMaxResidualGainChange = 15; + +// Maximum additional gain allowed to compensate for microphone level +// restrictions from clipping events. +const int kSurplusCompressionGain = 6; + +int ClampLevel(int mic_level) { + return rtc::SafeClamp(mic_level, kMinMicLevel, kMaxMicLevel); +} + +int LevelFromGainError(int gain_error, int level) { + RTC_DCHECK_GE(level, 0); + RTC_DCHECK_LE(level, kMaxMicLevel); + if (gain_error == 0) { + return level; + } + // TODO(ajm): Could be made more efficient with a binary search. + int new_level = level; + if (gain_error > 0) { + while (kGainMap[new_level] - kGainMap[level] < gain_error && + new_level < kMaxMicLevel) { + ++new_level; + } + } else { + while (kGainMap[new_level] - kGainMap[level] > gain_error && + new_level > kMinMicLevel) { + --new_level; + } + } + return new_level; +} + +} // namespace + +// Facility for dumping debug audio files. All methods are no-ops in the +// default case where WEBRTC_AGC_DEBUG_DUMP is undefined. +class DebugFile { +#ifdef WEBRTC_AGC_DEBUG_DUMP + public: + explicit DebugFile(const char* filename) + : file_(fopen(filename, "wb")) { + RTC_DCHECK(file_); + } + ~DebugFile() { + fclose(file_); + } + void Write(const int16_t* data, size_t length_samples) { + fwrite(data, 1, length_samples * sizeof(int16_t), file_); + } + private: + FILE* file_; +#else + public: + explicit DebugFile(const char* filename) { + } + ~DebugFile() { + } + void Write(const int16_t* data, size_t length_samples) { + } +#endif // WEBRTC_AGC_DEBUG_DUMP +}; + +AgcManagerDirect::AgcManagerDirect(GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level, + int clipped_level_min) + : agc_(new Agc()), + gctrl_(gctrl), + volume_callbacks_(volume_callbacks), + frames_since_clipped_(kClippedWaitFrames), + level_(0), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + capture_muted_(false), + check_volume_on_next_process_(true), // Check at startup. + startup_(true), + startup_min_level_(ClampLevel(startup_min_level)), + clipped_level_min_(clipped_level_min), + file_preproc_(new DebugFile("agc_preproc.pcm")), + file_postproc_(new DebugFile("agc_postproc.pcm")) {} + +AgcManagerDirect::AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level, + int clipped_level_min) + : agc_(agc), + gctrl_(gctrl), + volume_callbacks_(volume_callbacks), + frames_since_clipped_(kClippedWaitFrames), + level_(0), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + capture_muted_(false), + check_volume_on_next_process_(true), // Check at startup. + startup_(true), + startup_min_level_(ClampLevel(startup_min_level)), + clipped_level_min_(clipped_level_min), + file_preproc_(new DebugFile("agc_preproc.pcm")), + file_postproc_(new DebugFile("agc_postproc.pcm")) {} + +AgcManagerDirect::~AgcManagerDirect() {} + +int AgcManagerDirect::Initialize() { + max_level_ = kMaxMicLevel; + max_compression_gain_ = kMaxCompressionGain; + target_compression_ = kDefaultCompressionGain; + compression_ = target_compression_; + compression_accumulator_ = compression_; + capture_muted_ = false; + check_volume_on_next_process_ = true; + // TODO(bjornv): Investigate if we need to reset |startup_| as well. For + // example, what happens when we change devices. + + if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) { + RTC_LOG(LS_ERROR) << "set_mode(GainControl::kFixedDigital) failed."; + return -1; + } + if (gctrl_->set_target_level_dbfs(2) != 0) { + RTC_LOG(LS_ERROR) << "set_target_level_dbfs(2) failed."; + return -1; + } + if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) { + RTC_LOG(LS_ERROR) + << "set_compression_gain_db(kDefaultCompressionGain) failed."; + return -1; + } + if (gctrl_->enable_limiter(true) != 0) { + RTC_LOG(LS_ERROR) << "enable_limiter(true) failed."; + return -1; + } + return 0; +} + +void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, + int num_channels, + size_t samples_per_channel) { + size_t length = num_channels * samples_per_channel; + if (capture_muted_) { + return; + } + + file_preproc_->Write(audio, length); + + if (frames_since_clipped_ < kClippedWaitFrames) { + ++frames_since_clipped_; + return; + } + + // Check for clipped samples, as the AGC has difficulty detecting pitch + // under clipping distortion. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone level + // and enforce a new maximum level, dropped the same amount from the current + // maximum. This harsh treatment is an effort to avoid repeated clipped echo + // events. As compensation for this restriction, the maximum compression + // gain is increased, through SetMaxLevel(). + float clipped_ratio = agc_->AnalyzePreproc(audio, length); + if (clipped_ratio > kClippedRatioThreshold) { + RTC_LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + // Always decrease the maximum level, even if the current level is below + // threshold. + SetMaxLevel(std::max(clipped_level_min_, max_level_ - kClippedLevelStep)); + RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed", + level_ - kClippedLevelStep >= clipped_level_min_); + if (level_ > clipped_level_min_) { + // Don't try to adjust the level if we're already below the limit. As + // a consequence, if the user has brought the level above the limit, we + // will still not react until the postproc updates the level. + SetLevel(std::max(clipped_level_min_, level_ - kClippedLevelStep)); + // Reset the AGC since the level has changed. + agc_->Reset(); + } + frames_since_clipped_ = 0; + } +} + +void AgcManagerDirect::Process(const int16_t* audio, + size_t length, + int sample_rate_hz) { + if (capture_muted_) { + return; + } + + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + if (agc_->Process(audio, length, sample_rate_hz) != 0) { + RTC_LOG(LS_ERROR) << "Agc::Process failed"; + RTC_NOTREACHED(); + } + + UpdateGain(); + UpdateCompressor(); + + file_postproc_->Write(audio, length); +} + +void AgcManagerDirect::SetLevel(int new_level) { + int voe_level = volume_callbacks_->GetMicVolume(); + if (voe_level < 0) { + return; + } + if (voe_level == 0) { + RTC_LOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return; + } + if (voe_level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" + << voe_level; + return; + } + + if (voe_level > level_ + kLevelQuantizationSlack || + voe_level < level_ - kLevelQuantizationSlack) { + RTC_LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating " + << "stored level from " << level_ << " to " << voe_level; + level_ = voe_level; + // Always allow the user to increase the volume. + if (level_ > max_level_) { + SetMaxLevel(level_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. The compressor will still provide some of the + // desired gain change. + agc_->Reset(); + return; + } + + new_level = std::min(new_level, max_level_); + if (new_level == level_) { + return; + } + + volume_callbacks_->SetMicVolume(new_level); + RTC_LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", " + << "level_=" << level_ << ", " + << "new_level=" << new_level; + level_ = new_level; +} + +void AgcManagerDirect::SetMaxLevel(int level) { + RTC_DCHECK_GE(level, clipped_level_min_); + max_level_ = level; + // Scale the |kSurplusCompressionGain| linearly across the restricted + // level range. + max_compression_gain_ = + kMaxCompressionGain + std::floor((1.f * kMaxMicLevel - max_level_) / + (kMaxMicLevel - clipped_level_min_) * + kSurplusCompressionGain + + 0.5f); + RTC_LOG(LS_INFO) << "[agc] max_level_=" << max_level_ + << ", max_compression_gain_=" << max_compression_gain_; +} + +void AgcManagerDirect::SetCaptureMuted(bool muted) { + if (capture_muted_ == muted) { + return; + } + capture_muted_ = muted; + + if (!muted) { + // When we unmute, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +float AgcManagerDirect::voice_probability() { + return agc_->voice_probability(); +} + +int AgcManagerDirect::CheckVolumeAndReset() { + int level = volume_callbacks_->GetMicVolume(); + if (level < 0) { + return -1; + } + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of |level| == 0 we should raise it so the + // AGC can do its job properly. + if (level == 0 && !startup_) { + RTC_LOG(LS_INFO) + << "[agc] VolumeCallbacks returned level=0, taking no action."; + return 0; + } + if (level > kMaxMicLevel) { + RTC_LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level; + return -1; + } + RTC_LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level; + + int minLevel = startup_ ? startup_min_level_ : kMinMicLevel; + if (level < minLevel) { + level = minLevel; + RTC_LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level; + volume_callbacks_->SetMicVolume(level); + } + agc_->Reset(); + level_ = level; + startup_ = false; + return 0; +} + +// Requests the RMS error from AGC and distributes the required gain change +// between the digital compression stage and volume slider. We use the +// compressor first, providing a slack region around the current slider +// position to reduce movement. +// +// If the slider needs to be moved, we check first if the user has adjusted +// it, in which case we take no action and cache the updated level. +void AgcManagerDirect::UpdateGain() { + int rms_error = 0; + if (!agc_->GetRmsErrorDb(&rms_error)) { + // No error update ready. + return; + } + // The compressor will always add at least kMinCompressionGain. In effect, + // this adjusts our target gain upward by the same amount and rms_error + // needs to reflect that. + rms_error += kMinCompressionGain; + + // Handle as much error as possible with the compressor first. + int raw_compression = + rtc::SafeClamp(rms_error, kMinCompressionGain, max_compression_gain_); + + // Deemphasize the compression gain error. Move halfway between the current + // target and the newly received target. This serves to soften perceptible + // intra-talkspurt adjustments, at the cost of some adaptation speed. + if ((raw_compression == max_compression_gain_ && + target_compression_ == max_compression_gain_ - 1) || + (raw_compression == kMinCompressionGain && + target_compression_ == kMinCompressionGain + 1)) { + // Special case to allow the target to reach the endpoints of the + // compression range. The deemphasis would otherwise halt it at 1 dB shy. + target_compression_ = raw_compression; + } else { + target_compression_ = (raw_compression - target_compression_) / 2 + + target_compression_; + } + + // Residual error will be handled by adjusting the volume slider. Use the + // raw rather than deemphasized compression here as we would otherwise + // shrink the amount of slack the compressor provides. + const int residual_gain = + rtc::SafeClamp(rms_error - raw_compression, -kMaxResidualGainChange, + kMaxResidualGainChange); + RTC_LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", " + << "target_compression=" << target_compression_ << ", " + << "residual_gain=" << residual_gain; + if (residual_gain == 0) + return; + + int old_level = level_; + SetLevel(LevelFromGainError(residual_gain, level_)); + if (old_level != level_) { + // level_ was updated by SetLevel; log the new value. + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.AgcSetLevel", level_, 1, + kMaxMicLevel, 50); + } +} + +void AgcManagerDirect::UpdateCompressor() { + if (compression_ == target_compression_) { + return; + } + + // Adapt the compression gain slowly towards the target, in order to avoid + // highly perceptible changes. + if (target_compression_ > compression_) { + compression_accumulator_ += kCompressionGainStep; + } else { + compression_accumulator_ -= kCompressionGainStep; + } + + // The compressor accepts integer gains in dB. Adjust the gain when + // we've come within half a stepsize of the nearest integer. (We don't + // check for equality due to potential floating point imprecision). + int new_compression = compression_; + int nearest_neighbor = std::floor(compression_accumulator_ + 0.5); + if (std::fabs(compression_accumulator_ - nearest_neighbor) < + kCompressionGainStep / 2) { + new_compression = nearest_neighbor; + } + + // Set the new compression gain. + if (new_compression != compression_) { + compression_ = new_compression; + compression_accumulator_ = new_compression; + if (gctrl_->set_compression_gain_db(compression_) != 0) { + RTC_LOG(LS_ERROR) << "set_compression_gain_db(" << compression_ + << ") failed."; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct.h new file mode 100644 index 0000000000..03d2607f85 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ +#define MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ + +#include <memory> + +#include "modules/audio_processing/agc/agc.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class AudioFrame; +class DebugFile; +class GainControl; + +// Callbacks that need to be injected into AgcManagerDirect to read and control +// the volume values. This is done to remove the VoiceEngine dependency in +// AgcManagerDirect. +// TODO(aluebs): Remove VolumeCallbacks. +class VolumeCallbacks { + public: + virtual ~VolumeCallbacks() {} + virtual void SetMicVolume(int volume) = 0; + virtual int GetMicVolume() = 0; +}; + +// Direct interface to use AGC to set volume and compression values. +// AudioProcessing uses this interface directly to integrate the callback-less +// AGC. +// +// This class is not thread-safe. +class AgcManagerDirect final { + public: + // AgcManagerDirect will configure GainControl internally. The user is + // responsible for processing the audio using it after the call to Process. + // The operating range of startup_min_level is [12, 255] and any input value + // outside that range will be clamped. + AgcManagerDirect(GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level, + int clipped_level_min); + // Dependency injection for testing. Don't delete |agc| as the memory is owned + // by the manager. + AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level, + int clipped_level_min); + ~AgcManagerDirect(); + + int Initialize(); + void AnalyzePreProcess(int16_t* audio, + int num_channels, + size_t samples_per_channel); + void Process(const int16_t* audio, size_t length, int sample_rate_hz); + + // Call when the capture stream has been muted/unmuted. This causes the + // manager to disregard all incoming audio; chances are good it's background + // noise to which we'd like to avoid adapting. + void SetCaptureMuted(bool muted); + bool capture_muted() { return capture_muted_; } + + float voice_probability(); + + private: + // Sets a new microphone level, after first checking that it hasn't been + // updated by the user, in which case no action is taken. + void SetLevel(int new_level); + + // Set the maximum level the AGC is allowed to apply. Also updates the + // maximum compression gain to compensate. The level must be at least + // |kClippedLevelMin|. + void SetMaxLevel(int level); + + int CheckVolumeAndReset(); + void UpdateGain(); + void UpdateCompressor(); + + std::unique_ptr<Agc> agc_; + GainControl* gctrl_; + VolumeCallbacks* volume_callbacks_; + + int frames_since_clipped_; + int level_; + int max_level_; + int max_compression_gain_; + int target_compression_; + int compression_; + float compression_accumulator_; + bool capture_muted_; + bool check_volume_on_next_process_; + bool startup_; + int startup_min_level_; + const int clipped_level_min_; + + std::unique_ptr<DebugFile> file_preproc_; + std::unique_ptr<DebugFile> file_postproc_; + + RTC_DISALLOW_COPY_AND_ASSIGN(AgcManagerDirect); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc new file mode 100644 index 0000000000..6a8af6aa71 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct_unittest.cc @@ -0,0 +1,682 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/agc_manager_direct.h" + +#include "common_types.h" // NOLINT(build/include) +#include "modules/audio_processing/agc/mock_agc.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::_; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; + +namespace webrtc { +namespace { + +const int kSampleRateHz = 32000; +const int kNumChannels = 1; +const int kSamplesPerChannel = kSampleRateHz / 100; +const int kInitialVolume = 128; +constexpr int kClippedMin = 165; // Arbitrary, but different from the default. +const float kAboveClippedThreshold = 0.2f; + +class TestVolumeCallbacks : public VolumeCallbacks { + public: + TestVolumeCallbacks() : volume_(0) {} + void SetMicVolume(int volume) override { volume_ = volume; } + int GetMicVolume() override { return volume_; } + + private: + int volume_; +}; + +} // namespace + +class AgcManagerDirectTest : public ::testing::Test { + protected: + AgcManagerDirectTest() + : agc_(new MockAgc), + manager_(agc_, &gctrl_, &volume_, kInitialVolume, kClippedMin) { + ExpectInitialize(); + manager_.Initialize(); + } + + void FirstProcess() { + EXPECT_CALL(*agc_, Reset()); + EXPECT_CALL(*agc_, GetRmsErrorDb(_)).WillOnce(Return(false)); + CallProcess(1); + } + + void SetVolumeAndProcess(int volume) { + volume_.SetMicVolume(volume); + FirstProcess(); + } + + void ExpectCheckVolumeAndReset(int volume) { + volume_.SetMicVolume(volume); + EXPECT_CALL(*agc_, Reset()); + } + + void ExpectInitialize() { + EXPECT_CALL(gctrl_, set_mode(GainControl::kFixedDigital)); + EXPECT_CALL(gctrl_, set_target_level_dbfs(2)); + EXPECT_CALL(gctrl_, set_compression_gain_db(7)); + EXPECT_CALL(gctrl_, enable_limiter(true)); + } + + void CallProcess(int num_calls) { + for (int i = 0; i < num_calls; ++i) { + EXPECT_CALL(*agc_, Process(_, _, _)).WillOnce(Return(0)); + manager_.Process(nullptr, kSamplesPerChannel, kSampleRateHz); + } + } + + void CallPreProc(int num_calls) { + for (int i = 0; i < num_calls; ++i) { + manager_.AnalyzePreProcess(nullptr, kNumChannels, kSamplesPerChannel); + } + } + + MockAgc* agc_; + test::MockGainControl gctrl_; + TestVolumeCallbacks volume_; + AgcManagerDirect manager_; +}; + +TEST_F(AgcManagerDirectTest, StartupMinVolumeConfigurationIsRespected) { + FirstProcess(); + EXPECT_EQ(kInitialVolume, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, MicVolumeResponseToRmsError) { + FirstProcess(); + + // Compressor default; no residual error. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + CallProcess(1); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))); + CallProcess(1); + + // Above the compressor's window; volume should be increased. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + CallProcess(1); + EXPECT_EQ(130, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + CallProcess(1); + EXPECT_EQ(168, volume_.GetMicVolume()); + + // Inside the compressor's window; no change of volume. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); + CallProcess(1); + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))); + CallProcess(1); + + // Below the compressor's window; volume should be decreased. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + CallProcess(1); + EXPECT_EQ(167, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + CallProcess(1); + EXPECT_EQ(163, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-9), Return(true))); + CallProcess(1); + EXPECT_EQ(129, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, MicVolumeIsLimited) { + FirstProcess(); + + // Maximum upwards change is limited. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + CallProcess(1); + EXPECT_EQ(183, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + CallProcess(1); + EXPECT_EQ(243, volume_.GetMicVolume()); + + // Won't go higher than the maximum. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + CallProcess(1); + EXPECT_EQ(255, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + CallProcess(1); + EXPECT_EQ(254, volume_.GetMicVolume()); + + // Maximum downwards change is limited. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + CallProcess(1); + EXPECT_EQ(194, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + CallProcess(1); + EXPECT_EQ(137, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + CallProcess(1); + EXPECT_EQ(88, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + CallProcess(1); + EXPECT_EQ(54, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + CallProcess(1); + EXPECT_EQ(33, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + CallProcess(1); + EXPECT_EQ(18, volume_.GetMicVolume()); + + // Won't go lower than the minimum. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-40), Return(true))); + CallProcess(1); + EXPECT_EQ(12, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, CompressorStepsTowardsTarget) { + FirstProcess(); + + // Compressor default; no call to set_compression_gain_db. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(20); + + // Moves slowly upwards. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(8)).WillOnce(Return(0)); + CallProcess(1); + + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(9)).WillOnce(Return(0)); + CallProcess(1); + + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(20); + + // Moves slowly downward, then reverses before reaching the original target. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(8)).WillOnce(Return(0)); + CallProcess(1); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(9), Return(true))) + .WillRepeatedly(Return(false)); + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(9)).WillOnce(Return(0)); + CallProcess(1); + + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(20); +} + +TEST_F(AgcManagerDirectTest, CompressorErrorIsDeemphasized) { + FirstProcess(); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(8)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(9)).WillOnce(Return(0)); + CallProcess(1); + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(20); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(8)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(7)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(6)).WillOnce(Return(0)); + CallProcess(1); + EXPECT_CALL(gctrl_, set_compression_gain_db(_)).Times(0); + CallProcess(20); +} + +TEST_F(AgcManagerDirectTest, CompressorReachesMaximum) { + FirstProcess(); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(10), Return(true))) + .WillRepeatedly(Return(false)); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(8)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(9)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(10)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(11)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(12)).WillOnce(Return(0)); + CallProcess(1); +} + +TEST_F(AgcManagerDirectTest, CompressorReachesMinimum) { + FirstProcess(); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(0), Return(true))) + .WillRepeatedly(Return(false)); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(6)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(5)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(4)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(3)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(2)).WillOnce(Return(0)); + CallProcess(1); +} + +TEST_F(AgcManagerDirectTest, NoActionWhileMuted) { + manager_.SetCaptureMuted(true); + manager_.Process(nullptr, kSamplesPerChannel, kSampleRateHz); +} + +TEST_F(AgcManagerDirectTest, UnmutingChecksVolumeWithoutRaising) { + FirstProcess(); + + manager_.SetCaptureMuted(true); + manager_.SetCaptureMuted(false); + ExpectCheckVolumeAndReset(127); + // SetMicVolume should not be called. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)).WillOnce(Return(false)); + CallProcess(1); + EXPECT_EQ(127, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, UnmutingRaisesTooLowVolume) { + FirstProcess(); + + manager_.SetCaptureMuted(true); + manager_.SetCaptureMuted(false); + ExpectCheckVolumeAndReset(11); + EXPECT_CALL(*agc_, GetRmsErrorDb(_)).WillOnce(Return(false)); + CallProcess(1); + EXPECT_EQ(12, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, ManualLevelChangeResultsInNoSetMicCall) { + FirstProcess(); + + // Change outside of compressor's range, which would normally trigger a call + // to SetMicVolume. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + // GetMicVolume returns a value outside of the quantization slack, indicating + // a manual volume change. + volume_.SetMicVolume(154); + // SetMicVolume should not be called. + EXPECT_CALL(*agc_, Reset()).Times(1); + CallProcess(1); + EXPECT_EQ(154, volume_.GetMicVolume()); + + // Do the same thing, except downwards now. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + volume_.SetMicVolume(100); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallProcess(1); + EXPECT_EQ(100, volume_.GetMicVolume()); + + // And finally verify the AGC continues working without a manual change. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + CallProcess(1); + EXPECT_EQ(99, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, RecoveryAfterManualLevelChangeFromMax) { + FirstProcess(); + + // Force the mic up to max volume. Takes a few steps due to the residual + // gain limitation. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + CallProcess(1); + EXPECT_EQ(183, volume_.GetMicVolume()); + CallProcess(1); + EXPECT_EQ(243, volume_.GetMicVolume()); + CallProcess(1); + EXPECT_EQ(255, volume_.GetMicVolume()); + + // Manual change does not result in SetMicVolume call. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + volume_.SetMicVolume(50); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallProcess(1); + EXPECT_EQ(50, volume_.GetMicVolume()); + + // Continues working as usual afterwards. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + CallProcess(1); + EXPECT_EQ(69, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, RecoveryAfterManualLevelChangeBelowMin) { + FirstProcess(); + + // Manual change below min. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-1), Return(true))); + // Don't set to zero, which will cause AGC to take no action. + volume_.SetMicVolume(1); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallProcess(1); + EXPECT_EQ(1, volume_.GetMicVolume()); + + // Continues working as usual afterwards. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))); + CallProcess(1); + EXPECT_EQ(2, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + CallProcess(1); + EXPECT_EQ(11, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(20), Return(true))); + CallProcess(1); + EXPECT_EQ(18, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, NoClippingHasNoImpact) { + FirstProcess(); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)).WillRepeatedly(Return(0)); + CallPreProc(100); + EXPECT_EQ(128, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, ClippingUnderThresholdHasNoImpact) { + FirstProcess(); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)).WillOnce(Return(0.099)); + CallPreProc(1); + EXPECT_EQ(128, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, ClippingLowersVolume) { + SetVolumeAndProcess(255); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)).WillOnce(Return(0.101)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(240, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, WaitingPeriodBetweenClippingChecks) { + SetVolumeAndProcess(255); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(240, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillRepeatedly(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(0); + CallPreProc(300); + EXPECT_EQ(240, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(225, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, ClippingLoweringIsLimited) { + SetVolumeAndProcess(180); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(kClippedMin, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillRepeatedly(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(0); + CallPreProc(1000); + EXPECT_EQ(kClippedMin, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, ClippingMaxIsRespectedWhenEqualToLevel) { + SetVolumeAndProcess(255); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(240, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + CallProcess(10); + EXPECT_EQ(240, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, ClippingMaxIsRespectedWhenHigherThanLevel) { + SetVolumeAndProcess(200); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(185, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(40), Return(true))); + CallProcess(1); + EXPECT_EQ(240, volume_.GetMicVolume()); + CallProcess(10); + EXPECT_EQ(240, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, MaxCompressionIsIncreasedAfterClipping) { + SetVolumeAndProcess(210); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(195, volume_.GetMicVolume()); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(11), Return(true))) + .WillRepeatedly(Return(false)); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(8)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(9)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(10)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(11)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(12)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(13)).WillOnce(Return(0)); + CallProcess(1); + + // Continue clipping until we hit the maximum surplus compression. + CallPreProc(300); + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(180, volume_.GetMicVolume()); + + CallPreProc(300); + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(kClippedMin, volume_.GetMicVolume()); + + // Current level is now at the minimum, but the maximum allowed level still + // has more to decrease. + CallPreProc(300); + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + CallPreProc(1); + + CallPreProc(300); + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + CallPreProc(1); + + CallPreProc(300); + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + CallPreProc(1); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillOnce(DoAll(SetArgPointee<0>(16), Return(true))) + .WillRepeatedly(Return(false)); + CallProcess(19); + EXPECT_CALL(gctrl_, set_compression_gain_db(14)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(15)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(16)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(17)).WillOnce(Return(0)); + CallProcess(20); + EXPECT_CALL(gctrl_, set_compression_gain_db(18)).WillOnce(Return(0)); + CallProcess(1); +} + +TEST_F(AgcManagerDirectTest, UserCanRaiseVolumeAfterClipping) { + SetVolumeAndProcess(225); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallPreProc(1); + EXPECT_EQ(210, volume_.GetMicVolume()); + + // High enough error to trigger a volume check. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(14), Return(true))); + // User changed the volume. + volume_.SetMicVolume(250); + EXPECT_CALL(*agc_, Reset()).Times(1); + CallProcess(1); + EXPECT_EQ(250, volume_.GetMicVolume()); + + // Move down... + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(-10), Return(true))); + CallProcess(1); + EXPECT_EQ(210, volume_.GetMicVolume()); + // And back up to the new max established by the user. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(40), Return(true))); + CallProcess(1); + EXPECT_EQ(250, volume_.GetMicVolume()); + // Will not move above new maximum. + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillOnce(DoAll(SetArgPointee<0>(30), Return(true))); + CallProcess(1); + EXPECT_EQ(250, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, ClippingDoesNotPullLowVolumeBackUp) { + SetVolumeAndProcess(80); + + EXPECT_CALL(*agc_, AnalyzePreproc(_, _)) + .WillOnce(Return(kAboveClippedThreshold)); + EXPECT_CALL(*agc_, Reset()).Times(0); + int initial_volume = volume_.GetMicVolume(); + CallPreProc(1); + EXPECT_EQ(initial_volume, volume_.GetMicVolume()); +} + +TEST_F(AgcManagerDirectTest, TakesNoActionOnZeroMicVolume) { + FirstProcess(); + + EXPECT_CALL(*agc_, GetRmsErrorDb(_)) + .WillRepeatedly(DoAll(SetArgPointee<0>(30), Return(true))); + volume_.SetMicVolume(0); + CallProcess(10); + EXPECT_EQ(0, volume_.GetMicVolume()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/gain_map_internal.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/gain_map_internal.h new file mode 100644 index 0000000000..f09c748b59 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/gain_map_internal.h @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ + +static const int kGainMapSize = 256; +// Uses parameters: si = 2, sf = 0.25, D = 8/256 +static const int kGainMap[kGainMapSize] = { + -56, + -54, + -52, + -50, + -48, + -47, + -45, + -43, + -42, + -40, + -38, + -37, + -35, + -34, + -33, + -31, + -30, + -29, + -27, + -26, + -25, + -24, + -23, + -22, + -20, + -19, + -18, + -17, + -16, + -15, + -14, + -14, + -13, + -12, + -11, + -10, + -9, + -8, + -8, + -7, + -6, + -5, + -5, + -4, + -3, + -2, + -2, + -1, + 0, + 0, + 1, + 1, + 2, + 3, + 3, + 4, + 4, + 5, + 5, + 6, + 6, + 7, + 7, + 8, + 8, + 9, + 9, + 10, + 10, + 11, + 11, + 12, + 12, + 13, + 13, + 13, + 14, + 14, + 15, + 15, + 15, + 16, + 16, + 17, + 17, + 17, + 18, + 18, + 18, + 19, + 19, + 19, + 20, + 20, + 21, + 21, + 21, + 22, + 22, + 22, + 23, + 23, + 23, + 24, + 24, + 24, + 24, + 25, + 25, + 25, + 26, + 26, + 26, + 27, + 27, + 27, + 28, + 28, + 28, + 28, + 29, + 29, + 29, + 30, + 30, + 30, + 30, + 31, + 31, + 31, + 32, + 32, + 32, + 32, + 33, + 33, + 33, + 33, + 34, + 34, + 34, + 35, + 35, + 35, + 35, + 36, + 36, + 36, + 36, + 37, + 37, + 37, + 38, + 38, + 38, + 38, + 39, + 39, + 39, + 39, + 40, + 40, + 40, + 40, + 41, + 41, + 41, + 41, + 42, + 42, + 42, + 42, + 43, + 43, + 43, + 44, + 44, + 44, + 44, + 45, + 45, + 45, + 45, + 46, + 46, + 46, + 46, + 47, + 47, + 47, + 47, + 48, + 48, + 48, + 48, + 49, + 49, + 49, + 49, + 50, + 50, + 50, + 50, + 51, + 51, + 51, + 51, + 52, + 52, + 52, + 52, + 53, + 53, + 53, + 53, + 54, + 54, + 54, + 54, + 55, + 55, + 55, + 55, + 56, + 56, + 56, + 56, + 57, + 57, + 57, + 57, + 58, + 58, + 58, + 58, + 59, + 59, + 59, + 59, + 60, + 60, + 60, + 60, + 61, + 61, + 61, + 61, + 62, + 62, + 62, + 62, + 63, + 63, + 63, + 63, + 64 +}; + +#endif // MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/analog_agc.c b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/analog_agc.c new file mode 100644 index 0000000000..662e88b6e7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/analog_agc.c @@ -0,0 +1,1390 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* analog_agc.c + * + * Using a feedback system, determines an appropriate analog volume level + * given an input signal and current volume level. Targets a conservative + * signal level and is intended for use with a digital AGC to apply + * additional gain. + * + */ + +#include "modules/audio_processing/agc/legacy/analog_agc.h" + +#include <stdlib.h> +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include <stdio.h> +#endif + +#include "rtc_base/checks.h" + +/* The slope of in Q13*/ +static const int16_t kSlope1[8] = {21793, 12517, 7189, 4129, + 2372, 1362, 472, 78}; + +/* The offset in Q14 */ +static const int16_t kOffset1[8] = {25395, 23911, 22206, 20737, + 19612, 18805, 17951, 17367}; + +/* The slope of in Q13*/ +static const int16_t kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337}; + +/* The offset in Q14 */ +static const int16_t kOffset2[8] = {18432, 18379, 18290, 18177, + 18052, 17920, 17670, 17286}; + +static const int16_t kMuteGuardTimeMs = 8000; +static const int16_t kInitCheck = 42; +static const size_t kNumSubframes = 10; + +/* Default settings if config is not used */ +#define AGC_DEFAULT_TARGET_LEVEL 3 +#define AGC_DEFAULT_COMP_GAIN 9 +/* This is the target level for the analog part in ENV scale. To convert to RMS + * scale you + * have to add OFFSET_ENV_TO_RMS. + */ +#define ANALOG_TARGET_LEVEL 11 +#define ANALOG_TARGET_LEVEL_2 5 // ANALOG_TARGET_LEVEL / 2 +/* Offset between RMS scale (analog part) and ENV scale (digital part). This + * value actually + * varies with the FIXED_ANALOG_TARGET_LEVEL, hence we should in the future + * replace it with + * a table. + */ +#define OFFSET_ENV_TO_RMS 9 +/* The reference input level at which the digital part gives an output of + * targetLevelDbfs + * (desired level) if we have no compression gain. This level should be set high + * enough not + * to compress the peaks due to the dynamics. + */ +#define DIGITAL_REF_AT_0_COMP_GAIN 4 +/* Speed of reference level decrease. + */ +#define DIFF_REF_TO_ANALOG 5 + +#ifdef MIC_LEVEL_FEEDBACK +#define NUM_BLOCKS_IN_SAT_BEFORE_CHANGE_TARGET 7 +#endif +/* Size of analog gain table */ +#define GAIN_TBL_LEN 32 +/* Matlab code: + * fprintf(1, '\t%i, %i, %i, %i,\n', round(10.^(linspace(0,10,32)/20) * 2^12)); + */ +/* Q12 */ +static const uint16_t kGainTableAnalog[GAIN_TBL_LEN] = { + 4096, 4251, 4412, 4579, 4752, 4932, 5118, 5312, 5513, 5722, 5938, + 6163, 6396, 6638, 6889, 7150, 7420, 7701, 7992, 8295, 8609, 8934, + 9273, 9623, 9987, 10365, 10758, 11165, 11587, 12025, 12480, 12953}; + +/* Gain/Suppression tables for virtual Mic (in Q10) */ +static const uint16_t kGainTableVirtualMic[128] = { + 1052, 1081, 1110, 1141, 1172, 1204, 1237, 1271, 1305, 1341, 1378, + 1416, 1454, 1494, 1535, 1577, 1620, 1664, 1710, 1757, 1805, 1854, + 1905, 1957, 2010, 2065, 2122, 2180, 2239, 2301, 2364, 2428, 2495, + 2563, 2633, 2705, 2779, 2855, 2933, 3013, 3096, 3180, 3267, 3357, + 3449, 3543, 3640, 3739, 3842, 3947, 4055, 4166, 4280, 4397, 4517, + 4640, 4767, 4898, 5032, 5169, 5311, 5456, 5605, 5758, 5916, 6078, + 6244, 6415, 6590, 6770, 6956, 7146, 7341, 7542, 7748, 7960, 8178, + 8402, 8631, 8867, 9110, 9359, 9615, 9878, 10148, 10426, 10711, 11004, + 11305, 11614, 11932, 12258, 12593, 12938, 13292, 13655, 14029, 14412, 14807, + 15212, 15628, 16055, 16494, 16945, 17409, 17885, 18374, 18877, 19393, 19923, + 20468, 21028, 21603, 22194, 22801, 23425, 24065, 24724, 25400, 26095, 26808, + 27541, 28295, 29069, 29864, 30681, 31520, 32382}; +static const uint16_t kSuppressionTableVirtualMic[128] = { + 1024, 1006, 988, 970, 952, 935, 918, 902, 886, 870, 854, 839, 824, 809, 794, + 780, 766, 752, 739, 726, 713, 700, 687, 675, 663, 651, 639, 628, 616, 605, + 594, 584, 573, 563, 553, 543, 533, 524, 514, 505, 496, 487, 478, 470, 461, + 453, 445, 437, 429, 421, 414, 406, 399, 392, 385, 378, 371, 364, 358, 351, + 345, 339, 333, 327, 321, 315, 309, 304, 298, 293, 288, 283, 278, 273, 268, + 263, 258, 254, 249, 244, 240, 236, 232, 227, 223, 219, 215, 211, 208, 204, + 200, 197, 193, 190, 186, 183, 180, 176, 173, 170, 167, 164, 161, 158, 155, + 153, 150, 147, 145, 142, 139, 137, 134, 132, 130, 127, 125, 123, 121, 118, + 116, 114, 112, 110, 108, 106, 104, 102}; + +/* Table for target energy levels. Values in Q(-7) + * Matlab code + * targetLevelTable = fprintf('%d,\t%d,\t%d,\t%d,\n', + * round((32767*10.^(-(0:63)'/20)).^2*16/2^7) */ + +static const int32_t kTargetLevelTable[64] = { + 134209536, 106606424, 84680493, 67264106, 53429779, 42440782, 33711911, + 26778323, 21270778, 16895980, 13420954, 10660642, 8468049, 6726411, + 5342978, 4244078, 3371191, 2677832, 2127078, 1689598, 1342095, + 1066064, 846805, 672641, 534298, 424408, 337119, 267783, + 212708, 168960, 134210, 106606, 84680, 67264, 53430, + 42441, 33712, 26778, 21271, 16896, 13421, 10661, + 8468, 6726, 5343, 4244, 3371, 2678, 2127, + 1690, 1342, 1066, 847, 673, 534, 424, + 337, 268, 213, 169, 134, 107, 85, + 67}; + +int WebRtcAgc_AddMic(void* state, + int16_t* const* in_mic, + size_t num_bands, + size_t samples) { + int32_t nrg, max_nrg, sample, tmp32; + int32_t* ptr; + uint16_t targetGainIdx, gain; + size_t i; + int16_t n, L, tmp16, tmp_speech[16]; + LegacyAgc* stt; + stt = (LegacyAgc*)state; + + if (stt->fs == 8000) { + L = 8; + if (samples != 80) { + return -1; + } + } else { + L = 16; + if (samples != 160) { + return -1; + } + } + + /* apply slowly varying digital gain */ + if (stt->micVol > stt->maxAnalog) { + /* |maxLevel| is strictly >= |micVol|, so this condition should be + * satisfied here, ensuring there is no divide-by-zero. */ + RTC_DCHECK_GT(stt->maxLevel, stt->maxAnalog); + + /* Q1 */ + tmp16 = (int16_t)(stt->micVol - stt->maxAnalog); + tmp32 = (GAIN_TBL_LEN - 1) * tmp16; + tmp16 = (int16_t)(stt->maxLevel - stt->maxAnalog); + targetGainIdx = tmp32 / tmp16; + RTC_DCHECK_LT(targetGainIdx, GAIN_TBL_LEN); + + /* Increment through the table towards the target gain. + * If micVol drops below maxAnalog, we allow the gain + * to be dropped immediately. */ + if (stt->gainTableIdx < targetGainIdx) { + stt->gainTableIdx++; + } else if (stt->gainTableIdx > targetGainIdx) { + stt->gainTableIdx--; + } + + /* Q12 */ + gain = kGainTableAnalog[stt->gainTableIdx]; + + for (i = 0; i < samples; i++) { + size_t j; + for (j = 0; j < num_bands; ++j) { + sample = (in_mic[j][i] * gain) >> 12; + if (sample > 32767) { + in_mic[j][i] = 32767; + } else if (sample < -32768) { + in_mic[j][i] = -32768; + } else { + in_mic[j][i] = (int16_t)sample; + } + } + } + } else { + stt->gainTableIdx = 0; + } + + /* compute envelope */ + if (stt->inQueue > 0) { + ptr = stt->env[1]; + } else { + ptr = stt->env[0]; + } + + for (i = 0; i < kNumSubframes; i++) { + /* iterate over samples */ + max_nrg = 0; + for (n = 0; n < L; n++) { + nrg = in_mic[0][i * L + n] * in_mic[0][i * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + ptr[i] = max_nrg; + } + + /* compute energy */ + if (stt->inQueue > 0) { + ptr = stt->Rxx16w32_array[1]; + } else { + ptr = stt->Rxx16w32_array[0]; + } + + for (i = 0; i < kNumSubframes / 2; i++) { + if (stt->fs == 16000) { + WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32], 32, tmp_speech, + stt->filterState); + } else { + memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(short)); + } + /* Compute energy in blocks of 16 samples */ + ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4); + } + + /* update queue information */ + if (stt->inQueue == 0) { + stt->inQueue = 1; + } else { + stt->inQueue = 2; + } + + /* call VAD (use low band only) */ + WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples); + + return 0; +} + +int WebRtcAgc_AddFarend(void* state, const int16_t* in_far, size_t samples) { + LegacyAgc* stt = (LegacyAgc*)state; + + int err = WebRtcAgc_GetAddFarendError(state, samples); + + if (err != 0) + return err; + + return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples); +} + +int WebRtcAgc_GetAddFarendError(void* state, size_t samples) { + LegacyAgc* stt; + stt = (LegacyAgc*)state; + + if (stt == NULL) + return -1; + + if (stt->fs == 8000) { + if (samples != 80) + return -1; + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) + return -1; + } else { + return -1; + } + + return 0; +} + +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* in_near, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut) { + int32_t tmpFlt, micLevelTmp, gainIdx; + uint16_t gain; + size_t ii, j; + LegacyAgc* stt; + + uint32_t nrg; + size_t sampleCntr; + uint32_t frameNrg = 0; + uint32_t frameNrgLimit = 5500; + int16_t numZeroCrossing = 0; + const int16_t kZeroCrossingLowLim = 15; + const int16_t kZeroCrossingHighLim = 20; + + stt = (LegacyAgc*)agcInst; + + /* + * Before applying gain decide if this is a low-level signal. + * The idea is that digital AGC will not adapt to low-level + * signals. + */ + if (stt->fs != 8000) { + frameNrgLimit = frameNrgLimit << 1; + } + + frameNrg = (uint32_t)(in_near[0][0] * in_near[0][0]); + for (sampleCntr = 1; sampleCntr < samples; sampleCntr++) { + // increment frame energy if it is less than the limit + // the correct value of the energy is not important + if (frameNrg < frameNrgLimit) { + nrg = (uint32_t)(in_near[0][sampleCntr] * in_near[0][sampleCntr]); + frameNrg += nrg; + } + + // Count the zero crossings + numZeroCrossing += + ((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0); + } + + if ((frameNrg < 500) || (numZeroCrossing <= 5)) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing <= kZeroCrossingLowLim) { + stt->lowLevelSignal = 0; + } else if (frameNrg <= frameNrgLimit) { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing >= kZeroCrossingHighLim) { + stt->lowLevelSignal = 1; + } else { + stt->lowLevelSignal = 0; + } + + micLevelTmp = micLevelIn << stt->scale; + /* Set desired level */ + gainIdx = stt->micVol; + if (stt->micVol > stt->maxAnalog) { + gainIdx = stt->maxAnalog; + } + if (micLevelTmp != stt->micRef) { + /* Something has happened with the physical level, restart. */ + stt->micRef = micLevelTmp; + stt->micVol = 127; + *micLevelOut = 127; + stt->micGainIdx = 127; + gainIdx = 127; + } + /* Pre-process the signal to emulate the microphone level. */ + /* Take one step at a time in the gain table. */ + if (gainIdx > 127) { + gain = kGainTableVirtualMic[gainIdx - 128]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + for (ii = 0; ii < samples; ii++) { + tmpFlt = (in_near[0][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + gainIdx--; + if (gainIdx >= 127) { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + in_near[0][ii] = (int16_t)tmpFlt; + for (j = 1; j < num_bands; ++j) { + tmpFlt = (in_near[j][ii] * gain) >> 10; + if (tmpFlt > 32767) { + tmpFlt = 32767; + } + if (tmpFlt < -32768) { + tmpFlt = -32768; + } + in_near[j][ii] = (int16_t)tmpFlt; + } + } + /* Set the level we (finally) used */ + stt->micGainIdx = gainIdx; + // *micLevelOut = stt->micGainIdx; + *micLevelOut = stt->micGainIdx >> stt->scale; + /* Add to Mic as if it was the output from a true microphone */ + if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0) { + return -1; + } + return 0; +} + +void WebRtcAgc_UpdateAgcThresholds(LegacyAgc* stt) { + int16_t tmp16; +#ifdef MIC_LEVEL_FEEDBACK + int zeros; + + if (stt->micLvlSat) { + /* Lower the analog target level since we have reached its maximum */ + zeros = WebRtcSpl_NormW32(stt->Rxx160_LPw32); + stt->targetIdxOffset = (3 * zeros - stt->targetIdx - 2) / 4; + } +#endif + + /* Set analog target level in envelope dBOv scale */ + tmp16 = (DIFF_REF_TO_ANALOG * stt->compressionGaindB) + ANALOG_TARGET_LEVEL_2; + tmp16 = WebRtcSpl_DivW32W16ResW16((int32_t)tmp16, ANALOG_TARGET_LEVEL); + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN + tmp16; + if (stt->analogTarget < DIGITAL_REF_AT_0_COMP_GAIN) { + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN; + } + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->analogTarget = stt->compressionGaindB; + } +#ifdef MIC_LEVEL_FEEDBACK + stt->analogTarget += stt->targetIdxOffset; +#endif + /* Since the offset between RMS and ENV is not constant, we should make this + * into a + * table, but for now, we'll stick with a constant, tuned for the chosen + * analog + * target level. + */ + stt->targetIdx = ANALOG_TARGET_LEVEL + OFFSET_ENV_TO_RMS; +#ifdef MIC_LEVEL_FEEDBACK + stt->targetIdx += stt->targetIdxOffset; +#endif + /* Analog adaptation limits */ + /* analogTargetLevel = round((32767*10^(-targetIdx/20))^2*16/2^7) */ + stt->analogTargetLevel = + RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx]; /* ex. -20 dBov */ + stt->startUpperLimit = + RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 1]; /* -19 dBov */ + stt->startLowerLimit = + RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 1]; /* -21 dBov */ + stt->upperPrimaryLimit = + RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 2]; /* -18 dBov */ + stt->lowerPrimaryLimit = + RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 2]; /* -22 dBov */ + stt->upperSecondaryLimit = + RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 5]; /* -15 dBov */ + stt->lowerSecondaryLimit = + RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 5]; /* -25 dBov */ + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; +} + +void WebRtcAgc_SaturationCtrl(LegacyAgc* stt, + uint8_t* saturated, + int32_t* env) { + int16_t i, tmpW16; + + /* Check if the signal is saturated */ + for (i = 0; i < 10; i++) { + tmpW16 = (int16_t)(env[i] >> 20); + if (tmpW16 > 875) { + stt->envSum += tmpW16; + } + } + + if (stt->envSum > 25000) { + *saturated = 1; + stt->envSum = 0; + } + + /* stt->envSum *= 0.99; */ + stt->envSum = (int16_t)((stt->envSum * 32440) >> 15); +} + +void WebRtcAgc_ZeroCtrl(LegacyAgc* stt, int32_t* inMicLevel, int32_t* env) { + int16_t i; + int64_t tmp = 0; + int32_t midVal; + + /* Is the input signal zero? */ + for (i = 0; i < 10; i++) { + tmp += env[i]; + } + + /* Each block is allowed to have a few non-zero + * samples. + */ + if (tmp < 500) { + stt->msZero += 10; + } else { + stt->msZero = 0; + } + + if (stt->muteGuardMs > 0) { + stt->muteGuardMs -= 10; + } + + if (stt->msZero > 500) { + stt->msZero = 0; + + /* Increase microphone level only if it's less than 50% */ + midVal = (stt->maxAnalog + stt->minLevel + 1) / 2; + if (*inMicLevel < midVal) { + /* *inMicLevel *= 1.1; */ + *inMicLevel = (1126 * *inMicLevel) >> 10; + /* Reduces risk of a muted mic repeatedly triggering excessive levels due + * to zero signal detection. */ + *inMicLevel = WEBRTC_SPL_MIN(*inMicLevel, stt->zeroCtrlMax); + stt->micVol = *inMicLevel; + } + +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\t\tAGC->zeroCntrl, frame %d: 500 ms under threshold," + " micVol: %d\n", + stt->fcount, stt->micVol); +#endif + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* The AGC has a tendency (due to problems with the VAD parameters), to + * vastly increase the volume after a muting event. This timer prevents + * upwards adaptation for a short period. */ + stt->muteGuardMs = kMuteGuardTimeMs; + } +} + +void WebRtcAgc_SpeakerInactiveCtrl(LegacyAgc* stt) { + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + + int32_t tmp32; + int16_t vadThresh; + + if (stt->vadMic.stdLongTerm < 2500) { + stt->vadThreshold = 1500; + } else { + vadThresh = kNormalVadThreshold; + if (stt->vadMic.stdLongTerm < 4500) { + /* Scale between min and max threshold */ + vadThresh += (4500 - stt->vadMic.stdLongTerm) / 2; + } + + /* stt->vadThreshold = (31 * stt->vadThreshold + vadThresh) / 32; */ + tmp32 = vadThresh + 31 * stt->vadThreshold; + stt->vadThreshold = (int16_t)(tmp32 >> 5); + } +} + +void WebRtcAgc_ExpCurve(int16_t volume, int16_t* index) { + // volume in Q14 + // index in [0-7] + /* 8 different curves */ + if (volume > 5243) { + if (volume > 7864) { + if (volume > 12124) { + *index = 7; + } else { + *index = 6; + } + } else { + if (volume > 6554) { + *index = 5; + } else { + *index = 4; + } + } + } else { + if (volume > 2621) { + if (volume > 3932) { + *index = 3; + } else { + *index = 2; + } + } else { + if (volume > 1311) { + *index = 1; + } else { + *index = 0; + } + } + } +} + +int32_t WebRtcAgc_ProcessAnalog(void* state, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t vadLogRatio, + int16_t echo, + uint8_t* saturationWarning) { + uint32_t tmpU32; + int32_t Rxx16w32, tmp32; + int32_t inMicLevelTmp, lastMicVol; + int16_t i; + uint8_t saturated = 0; + LegacyAgc* stt; + + stt = (LegacyAgc*)state; + inMicLevelTmp = inMicLevel << stt->scale; + + if (inMicLevelTmp > stt->maxAnalog) { +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl > maxAnalog\n", + stt->fcount); +#endif + return -1; + } else if (inMicLevelTmp < stt->minLevel) { +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel\n", + stt->fcount); +#endif + return -1; + } + + if (stt->firstCall == 0) { + int32_t tmpVol; + stt->firstCall = 1; + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + tmpVol = (stt->minLevel + tmp32); + + /* If the mic level is very low at start, increase it! */ + if ((inMicLevelTmp < tmpVol) && (stt->agcMode == kAgcModeAdaptiveAnalog)) { + inMicLevelTmp = tmpVol; + } + stt->micVol = inMicLevelTmp; + } + + /* Set the mic level to the previous output value if there is digital input + * gain */ + if ((inMicLevelTmp == stt->maxAnalog) && (stt->micVol > stt->maxAnalog)) { + inMicLevelTmp = stt->micVol; + } + + /* If the mic level was manually changed to a very low value raise it! */ + if ((inMicLevelTmp != stt->micVol) && (inMicLevelTmp < stt->minOutput)) { + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; + inMicLevelTmp = (stt->minLevel + tmp32); + stt->micVol = inMicLevelTmp; +#ifdef MIC_LEVEL_FEEDBACK +// stt->numBlocksMicLvlSat = 0; +#endif +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel by manual" + " decrease, raise vol\n", + stt->fcount); +#endif + } + + if (inMicLevelTmp != stt->micVol) { + if (inMicLevel == stt->lastInMicLevel) { + // We requested a volume adjustment, but it didn't occur. This is + // probably due to a coarse quantization of the volume slider. + // Restore the requested value to prevent getting stuck. + inMicLevelTmp = stt->micVol; + } else { + // As long as the value changed, update to match. + stt->micVol = inMicLevelTmp; + } + } + + if (inMicLevelTmp > stt->maxLevel) { + // Always allow the user to raise the volume above the maxLevel. + stt->maxLevel = inMicLevelTmp; + } + + // Store last value here, after we've taken care of manual updates etc. + stt->lastInMicLevel = inMicLevel; + lastMicVol = stt->micVol; + + /* Checks if the signal is saturated. Also a check if individual samples + * are larger than 12000 is done. If they are the counter for increasing + * the volume level is set to -100ms + */ + WebRtcAgc_SaturationCtrl(stt, &saturated, stt->env[0]); + + /* The AGC is always allowed to lower the level if the signal is saturated */ + if (saturated == 1) { + /* Lower the recording level + * Rxx160_LP is adjusted down because it is so slow it could + * cause the AGC to make wrong decisions. */ + /* stt->Rxx160_LPw32 *= 0.875; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 8) * 7; + + stt->zeroCtrlMax = stt->micVol; + + /* stt->micVol *= 0.903; */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(29591, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 2) { + stt->micVol = lastMicVol - 2; + } + inMicLevelTmp = stt->micVol; + +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: saturated, micVol = %d\n", + stt->fcount, stt->micVol); +#endif + + if (stt->micVol < stt->minOutput) { + *saturationWarning = 1; + } + + /* Reset counter for decrease of volume level to avoid + * decreasing too much. The saturation control can still + * lower the level if needed. */ + stt->msTooHigh = -100; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. This must be done since + * the measure is very slow. */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* Reset to initial values */ + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + stt->changeToSlowMode = 0; + + stt->muteGuardMs = 0; + + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; +#ifdef MIC_LEVEL_FEEDBACK +// stt->numBlocksMicLvlSat = 0; +#endif + } + + /* Check if the input speech is zero. If so the mic volume + * is increased. On some computers the input is zero up as high + * level as 17% */ + WebRtcAgc_ZeroCtrl(stt, &inMicLevelTmp, stt->env[0]); + + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + WebRtcAgc_SpeakerInactiveCtrl(stt); + + for (i = 0; i < 5; i++) { + /* Computed on blocks of 16 samples */ + + Rxx16w32 = stt->Rxx16w32_array[0][i]; + + /* Rxx160w32 in Q(-7) */ + tmp32 = (Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos]) >> 3; + stt->Rxx160w32 = stt->Rxx160w32 + tmp32; + stt->Rxx16_vectorw32[stt->Rxx16pos] = Rxx16w32; + + /* Circular buffer */ + stt->Rxx16pos++; + if (stt->Rxx16pos == RXX_BUFFER_LEN) { + stt->Rxx16pos = 0; + } + + /* Rxx16_LPw32 in Q(-4) */ + tmp32 = (Rxx16w32 - stt->Rxx16_LPw32) >> kAlphaShortTerm; + stt->Rxx16_LPw32 = (stt->Rxx16_LPw32) + tmp32; + + if (vadLogRatio > stt->vadThreshold) { + /* Speech detected! */ + + /* Check if Rxx160_LP is in the correct range. If + * it is too high/low then we set it to the maximum of + * Rxx16_LPw32 during the first 200ms of speech. + */ + if (stt->activeSpeech < 250) { + stt->activeSpeech += 2; + + if (stt->Rxx16_LPw32 > stt->Rxx16_LPw32Max) { + stt->Rxx16_LPw32Max = stt->Rxx16_LPw32; + } + } else if (stt->activeSpeech == 250) { + stt->activeSpeech += 2; + tmp32 = stt->Rxx16_LPw32Max >> 3; + stt->Rxx160_LPw32 = tmp32 * RXX_BUFFER_LEN; + } + + tmp32 = (stt->Rxx160w32 - stt->Rxx160_LPw32) >> kAlphaLongTerm; + stt->Rxx160_LPw32 = stt->Rxx160_LPw32 + tmp32; + + if (stt->Rxx160_LPw32 > stt->upperSecondaryLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechOuterChange) { + stt->msTooHigh = 0; + + /* Lower the recording level */ + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + tmp32 = stt->Rxx160_LPw32 >> 6; + stt->Rxx160_LPw32 = tmp32 * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.95 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31130, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. + */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; +#ifdef MIC_LEVEL_FEEDBACK +// stt->numBlocksMicLvlSat = 0; +#endif +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure >" + " 2ndUpperLim, micVol = %d, maxLevel = %d\n", + stt->fcount, stt->micVol, stt->maxLevel); +#endif + } + } else if (stt->Rxx160_LPw32 > stt->upperLimit) { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechInnerChange) { + /* Lower the recording level */ + stt->msTooHigh = 0; + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 53; + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + */ + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.965 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + WEBRTC_SPL_UMUL(31621, (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + +#ifdef MIC_LEVEL_FEEDBACK +// stt->numBlocksMicLvlSat = 0; +#endif +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure >" + " UpperLim, micVol = %d, maxLevel = %d\n", + stt->fcount, stt->micVol, stt->maxLevel); +#endif + } + } else if (stt->Rxx160_LPw32 < stt->lowerSecondaryLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechOuterChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, 32^(-2*X)/2+1.05 + */ + weightFIX = + kOffset1[index] - (int16_t)((kSlope1[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 2) { + stt->micVol = lastMicVol + 2; + } + + inMicLevelTmp = stt->micVol; + +#ifdef MIC_LEVEL_FEEDBACK + /* Count ms in level saturation */ + // if (stt->micVol > stt->maxAnalog) { + if (stt->micVol > 150) { + /* mic level is saturated */ + stt->numBlocksMicLvlSat++; + fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat); + } +#endif +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure <" + " 2ndLowerLim, micVol = %d\n", + stt->fcount, stt->micVol); +#endif + } + } else if (stt->Rxx160_LPw32 < stt->lowerLimit) { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechInnerChange) { + /* Raise the recording level */ + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; + if (stt->maxInit != stt->minLevel) { + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, (3.^(-2.*X))/8+1 + */ + weightFIX = + kOffset2[index] - (int16_t)((kSlope2[index] * volNormFIX) >> 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = + ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 1) { + stt->micVol = lastMicVol + 1; + } + + inMicLevelTmp = stt->micVol; + +#ifdef MIC_LEVEL_FEEDBACK + /* Count ms in level saturation */ + // if (stt->micVol > stt->maxAnalog) { + if (stt->micVol > 150) { + /* mic level is saturated */ + stt->numBlocksMicLvlSat++; + fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat); + } +#endif +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure < LowerLim, micVol " + "= %d\n", + stt->fcount, stt->micVol); +#endif + } + } else { + /* The signal is inside the desired range which is: + * lowerLimit < Rxx160_LP/640 < upperLimit + */ + if (stt->changeToSlowMode > 4000) { + stt->msecSpeechInnerChange = 1000; + stt->msecSpeechOuterChange = 500; + stt->upperLimit = stt->upperPrimaryLimit; + stt->lowerLimit = stt->lowerPrimaryLimit; + } else { + stt->changeToSlowMode += 2; // in milliseconds + } + stt->msTooLow = 0; + stt->msTooHigh = 0; + + stt->micVol = inMicLevelTmp; + } +#ifdef MIC_LEVEL_FEEDBACK + if (stt->numBlocksMicLvlSat > NUM_BLOCKS_IN_SAT_BEFORE_CHANGE_TARGET) { + stt->micLvlSat = 1; + fprintf(stderr, "target before = %d (%d)\n", stt->analogTargetLevel, + stt->targetIdx); + WebRtcAgc_UpdateAgcThresholds(stt); + WebRtcAgc_CalculateGainTable( + &(stt->digitalAgc.gainTable[0]), stt->compressionGaindB, + stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget); + stt->numBlocksMicLvlSat = 0; + stt->micLvlSat = 0; + fprintf(stderr, "target offset = %d\n", stt->targetIdxOffset); + fprintf(stderr, "target after = %d (%d)\n", stt->analogTargetLevel, + stt->targetIdx); + } +#endif + } + } + + /* Ensure gain is not increased in presence of echo or after a mute event + * (but allow the zeroCtrl() increase on the frame of a mute detection). + */ + if (echo == 1 || + (stt->muteGuardMs > 0 && stt->muteGuardMs < kMuteGuardTimeMs)) { + if (stt->micVol > lastMicVol) { + stt->micVol = lastMicVol; + } + } + + /* limit the gain */ + if (stt->micVol > stt->maxLevel) { + stt->micVol = stt->maxLevel; + } else if (stt->micVol < stt->minOutput) { + stt->micVol = stt->minOutput; + } + + *outMicLevel = WEBRTC_SPL_MIN(stt->micVol, stt->maxAnalog) >> stt->scale; + + return 0; +} + +int WebRtcAgc_Process(void* agcInst, + const int16_t* const* in_near, + size_t num_bands, + size_t samples, + int16_t* const* out, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning) { + LegacyAgc* stt; + + stt = (LegacyAgc*)agcInst; + + // + if (stt == NULL) { + return -1; + } + // + + if (stt->fs == 8000) { + if (samples != 80) { + return -1; + } + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) { + return -1; + } + } else { + return -1; + } + + *saturationWarning = 0; + // TODO(minyue): PUT IN RANGE CHECKING FOR INPUT LEVELS + *outMicLevel = inMicLevel; + +#ifdef WEBRTC_AGC_DEBUG_DUMP + stt->fcount++; +#endif + + if (WebRtcAgc_ProcessDigital(&stt->digitalAgc, in_near, num_bands, out, + stt->fs, stt->lowLevelSignal) == -1) { +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "AGC->Process, frame %d: Error from DigAGC\n\n", + stt->fcount); +#endif + return -1; + } + if (stt->agcMode < kAgcModeFixedDigital && + (stt->lowLevelSignal == 0 || stt->agcMode != kAgcModeAdaptiveDigital)) { + if (WebRtcAgc_ProcessAnalog(agcInst, inMicLevel, outMicLevel, + stt->vadMic.logRatio, echo, + saturationWarning) == -1) { + return -1; + } + } +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->agcLog, "%5d\t%d\t%d\t%d\t%d\n", stt->fcount, inMicLevel, + *outMicLevel, stt->maxLevel, stt->micVol); +#endif + + /* update queue */ + if (stt->inQueue > 1) { + memcpy(stt->env[0], stt->env[1], 10 * sizeof(int32_t)); + memcpy(stt->Rxx16w32_array[0], stt->Rxx16w32_array[1], 5 * sizeof(int32_t)); + } + + if (stt->inQueue > 0) { + stt->inQueue--; + } + + return 0; +} + +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig agcConfig) { + LegacyAgc* stt; + stt = (LegacyAgc*)agcInst; + + if (stt == NULL) { + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + if (agcConfig.limiterEnable != kAgcFalse && + agcConfig.limiterEnable != kAgcTrue) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->limiterEnable = agcConfig.limiterEnable; + stt->compressionGaindB = agcConfig.compressionGaindB; + if ((agcConfig.targetLevelDbfs < 0) || (agcConfig.targetLevelDbfs > 31)) { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->targetLevelDbfs = agcConfig.targetLevelDbfs; + + if (stt->agcMode == kAgcModeFixedDigital) { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->compressionGaindB += agcConfig.targetLevelDbfs; + } + + /* Update threshold levels for analog adaptation */ + WebRtcAgc_UpdateAgcThresholds(stt); + + /* Recalculate gain table */ + if (WebRtcAgc_CalculateGainTable( + &(stt->digitalAgc.gainTable[0]), stt->compressionGaindB, + stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget) == -1) { +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "AGC->set_config, frame %d: Error from calcGainTable\n\n", + stt->fcount); +#endif + return -1; + } + /* Store the config in a WebRtcAgcConfig */ + stt->usedConfig.compressionGaindB = agcConfig.compressionGaindB; + stt->usedConfig.limiterEnable = agcConfig.limiterEnable; + stt->usedConfig.targetLevelDbfs = agcConfig.targetLevelDbfs; + + return 0; +} + +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config) { + LegacyAgc* stt; + stt = (LegacyAgc*)agcInst; + + if (stt == NULL) { + return -1; + } + + if (config == NULL) { + stt->lastError = AGC_NULL_POINTER_ERROR; + return -1; + } + + if (stt->initFlag != kInitCheck) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + config->limiterEnable = stt->usedConfig.limiterEnable; + config->targetLevelDbfs = stt->usedConfig.targetLevelDbfs; + config->compressionGaindB = stt->usedConfig.compressionGaindB; + + return 0; +} + +void* WebRtcAgc_Create() { + LegacyAgc* stt = malloc(sizeof(LegacyAgc)); + +#ifdef WEBRTC_AGC_DEBUG_DUMP + stt->fpt = fopen("./agc_test_log.txt", "wt"); + stt->agcLog = fopen("./agc_debug_log.txt", "wt"); + stt->digitalAgc.logFile = fopen("./agc_log.txt", "wt"); +#endif + + stt->initFlag = 0; + stt->lastError = 0; + + return stt; +} + +void WebRtcAgc_Free(void* state) { + LegacyAgc* stt; + + stt = (LegacyAgc*)state; +#ifdef WEBRTC_AGC_DEBUG_DUMP + fclose(stt->fpt); + fclose(stt->agcLog); + fclose(stt->digitalAgc.logFile); +#endif + free(stt); +} + +/* minLevel - Minimum volume level + * maxLevel - Maximum volume level + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs) { + int32_t max_add, tmp32; + int16_t i; + int tmpNorm; + LegacyAgc* stt; + + /* typecast state pointer */ + stt = (LegacyAgc*)agcInst; + + if (WebRtcAgc_InitDigital(&stt->digitalAgc, agcMode) != 0) { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + /* Analog AGC variables */ + stt->envSum = 0; + +/* mode = 0 - Only saturation protection + * 1 - Analog Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 2 - Digital Automatic Gain Control [-targetLevelDbfs (default -3 + * dBOv)] + * 3 - Fixed Digital Gain [compressionGaindB (default 8 dB)] + */ +#ifdef WEBRTC_AGC_DEBUG_DUMP + stt->fcount = 0; + fprintf(stt->fpt, "AGC->Init\n"); +#endif + if (agcMode < kAgcModeUnchanged || agcMode > kAgcModeFixedDigital) { +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "AGC->Init: error, incorrect mode\n\n"); +#endif + return -1; + } + stt->agcMode = agcMode; + stt->fs = fs; + + /* initialize input VAD */ + WebRtcAgc_InitVad(&stt->vadMic); + + /* If the volume range is smaller than 0-256 then + * the levels are shifted up to Q8-domain */ + tmpNorm = WebRtcSpl_NormU32((uint32_t)maxLevel); + stt->scale = tmpNorm - 23; + if (stt->scale < 0) { + stt->scale = 0; + } + // TODO(bjornv): Investigate if we really need to scale up a small range now + // when we have + // a guard against zero-increments. For now, we do not support scale up (scale + // = 0). + stt->scale = 0; + maxLevel <<= stt->scale; + minLevel <<= stt->scale; + + /* Make minLevel and maxLevel static in AdaptiveDigital */ + if (stt->agcMode == kAgcModeAdaptiveDigital) { + minLevel = 0; + maxLevel = 255; + stt->scale = 0; + } + /* The maximum supplemental volume range is based on a vague idea + * of how much lower the gain will be than the real analog gain. */ + max_add = (maxLevel - minLevel) / 4; + + /* Minimum/maximum volume level that can be set */ + stt->minLevel = minLevel; + stt->maxAnalog = maxLevel; + stt->maxLevel = maxLevel + max_add; + stt->maxInit = stt->maxLevel; + + stt->zeroCtrlMax = stt->maxAnalog; + stt->lastInMicLevel = 0; + + /* Initialize micVol parameter */ + stt->micVol = stt->maxAnalog; + if (stt->agcMode == kAgcModeAdaptiveDigital) { + stt->micVol = 127; /* Mid-point of mic level */ + } + stt->micRef = stt->micVol; + stt->micGainIdx = 127; +#ifdef MIC_LEVEL_FEEDBACK + stt->numBlocksMicLvlSat = 0; + stt->micLvlSat = 0; +#endif +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "AGC->Init: minLevel = %d, maxAnalog = %d, maxLevel = %d\n", + stt->minLevel, stt->maxAnalog, stt->maxLevel); +#endif + + /* Minimum output volume is 4% higher than the available lowest volume level + */ + tmp32 = ((stt->maxLevel - stt->minLevel) * 10) >> 8; + stt->minOutput = (stt->minLevel + tmp32); + + stt->msTooLow = 0; + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->firstCall = 0; + stt->msZero = 0; + stt->muteGuardMs = 0; + stt->gainTableIdx = 0; + + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + stt->vadThreshold = kNormalVadThreshold; + stt->inActive = 0; + + for (i = 0; i < RXX_BUFFER_LEN; i++) { + stt->Rxx16_vectorw32[i] = (int32_t)1000; /* -54dBm0 */ + } + stt->Rxx160w32 = + 125 * RXX_BUFFER_LEN; /* (stt->Rxx16_vectorw32[0]>>3) = 125 */ + + stt->Rxx16pos = 0; + stt->Rxx16_LPw32 = (int32_t)16284; /* Q(-4) */ + + for (i = 0; i < 5; i++) { + stt->Rxx16w32_array[0][i] = 0; + } + for (i = 0; i < 10; i++) { + stt->env[0][i] = 0; + stt->env[1][i] = 0; + } + stt->inQueue = 0; + +#ifdef MIC_LEVEL_FEEDBACK + stt->targetIdxOffset = 0; +#endif + + WebRtcSpl_MemSetW32(stt->filterState, 0, 8); + + stt->initFlag = kInitCheck; + // Default config settings. + stt->defaultConfig.limiterEnable = kAgcTrue; + stt->defaultConfig.targetLevelDbfs = AGC_DEFAULT_TARGET_LEVEL; + stt->defaultConfig.compressionGaindB = AGC_DEFAULT_COMP_GAIN; + + if (WebRtcAgc_set_config(stt, stt->defaultConfig) == -1) { + stt->lastError = AGC_UNSPECIFIED_ERROR; + return -1; + } + stt->Rxx160_LPw32 = stt->analogTargetLevel; // Initialize rms value + + stt->lowLevelSignal = 0; + + /* Only positive values are allowed that are not too large */ + if ((minLevel >= maxLevel) || (maxLevel & 0xFC000000)) { +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "minLevel, maxLevel value(s) are invalid\n\n"); +#endif + return -1; + } else { +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, "\n"); +#endif + return 0; + } +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/analog_agc.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/analog_agc.h new file mode 100644 index 0000000000..1fed3779ae --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/analog_agc.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ + +//#define MIC_LEVEL_FEEDBACK +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include <stdio.h> +#endif + +#include "modules/audio_processing/agc/legacy/digital_agc.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" +#include "typedefs.h" // NOLINT(build/include) + +/* Analog Automatic Gain Control variables: + * Constant declarations (inner limits inside which no changes are done) + * In the beginning the range is narrower to widen as soon as the measure + * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 + * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal + * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm + * The limits are created by running the AGC with a file having the desired + * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined + * by out=10*log10(in/260537279.7); Set the target level to the average level + * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in + * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) + */ +#define RXX_BUFFER_LEN 10 + +static const int16_t kMsecSpeechInner = 520; +static const int16_t kMsecSpeechOuter = 340; + +static const int16_t kNormalVadThreshold = 400; + +static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156 +static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977 + +typedef struct { + // Configurable parameters/variables + uint32_t fs; // Sampling frequency + int16_t compressionGaindB; // Fixed gain level in dB + int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3) + int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) + uint8_t limiterEnable; // Enabling limiter (on/off (default off)) + WebRtcAgcConfig defaultConfig; + WebRtcAgcConfig usedConfig; + + // General variables + int16_t initFlag; + int16_t lastError; + + // Target level parameters + // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) + int32_t analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs + int32_t startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs + int32_t startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs + int32_t upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs + int32_t lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs + int32_t upperSecondaryLimit; // = RXX_BUFFER_LEN * 2677832; -17 dBfs + int32_t lowerSecondaryLimit; // = RXX_BUFFER_LEN * 267783; -27 dBfs + uint16_t targetIdx; // Table index for corresponding target level +#ifdef MIC_LEVEL_FEEDBACK + uint16_t targetIdxOffset; // Table index offset for level compensation +#endif + int16_t analogTarget; // Digital reference level in ENV scale + + // Analog AGC specific variables + int32_t filterState[8]; // For downsampling wb to nb + int32_t upperLimit; // Upper limit for mic energy + int32_t lowerLimit; // Lower limit for mic energy + int32_t Rxx160w32; // Average energy for one frame + int32_t Rxx16_LPw32; // Low pass filtered subframe energies + int32_t Rxx160_LPw32; // Low pass filtered frame energies + int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe + int32_t Rxx16_vectorw32[RXX_BUFFER_LEN]; // Array with subframe energies + int32_t Rxx16w32_array[2][5]; // Energy values of microphone signal + int32_t env[2][10]; // Envelope values of subframes + + int16_t Rxx16pos; // Current position in the Rxx16_vectorw32 + int16_t envSum; // Filtered scaled envelope in subframes + int16_t vadThreshold; // Threshold for VAD decision + int16_t inActive; // Inactive time in milliseconds + int16_t msTooLow; // Milliseconds of speech at a too low level + int16_t msTooHigh; // Milliseconds of speech at a too high level + int16_t changeToSlowMode; // Change to slow mode after some time at target + int16_t firstCall; // First call to the process-function + int16_t msZero; // Milliseconds of zero input + int16_t msecSpeechOuterChange; // Min ms of speech between volume changes + int16_t msecSpeechInnerChange; // Min ms of speech between volume changes + int16_t activeSpeech; // Milliseconds of active speech + int16_t muteGuardMs; // Counter to prevent mute action + int16_t inQueue; // 10 ms batch indicator + + // Microphone level variables + int32_t micRef; // Remember ref. mic level for virtual mic + uint16_t gainTableIdx; // Current position in virtual gain table + int32_t micGainIdx; // Gain index of mic level to increase slowly + int32_t micVol; // Remember volume between frames + int32_t maxLevel; // Max possible vol level, incl dig gain + int32_t maxAnalog; // Maximum possible analog volume level + int32_t maxInit; // Initial value of "max" + int32_t minLevel; // Minimum possible volume level + int32_t minOutput; // Minimum output volume level + int32_t zeroCtrlMax; // Remember max gain => don't amp low input + int32_t lastInMicLevel; + + int16_t scale; // Scale factor for internal volume levels +#ifdef MIC_LEVEL_FEEDBACK + int16_t numBlocksMicLvlSat; + uint8_t micLvlSat; +#endif + // Structs for VAD and digital_agc + AgcVad vadMic; + DigitalAgc digitalAgc; + +#ifdef WEBRTC_AGC_DEBUG_DUMP + FILE* fpt; + FILE* agcLog; + int32_t fcount; +#endif + + int16_t lowLevelSignal; +} LegacyAgc; + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/digital_agc.c b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/digital_agc.c new file mode 100644 index 0000000000..3269a17ce4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/digital_agc.c @@ -0,0 +1,703 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* digital_agc.c + * + */ + +#include "modules/audio_processing/agc/legacy/digital_agc.h" + +#include <string.h> +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include <stdio.h> +#endif + +#include "rtc_base/checks.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" + +// To generate the gaintable, copy&paste the following lines to a Matlab window: +// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; +// zeros = 0:31; lvl = 2.^(1-zeros); +// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; +// B = MaxGain - MinGain; +// gains = round(2^16*10.^(0.05 * (MinGain + B * ( +// log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / +// log(1/(1+exp(Knee*B)))))); +// fprintf(1, '\t%i, %i, %i, %i,\n', gains); +// % Matlab code for plotting the gain and input/output level characteristic +// (copy/paste the following 3 lines): +// in = 10*log10(lvl); out = 20*log10(gains/65536); +// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input +// (dB)'); ylabel('Gain (dB)'); +// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; +// xlabel('Input (dB)'); ylabel('Output (dB)'); +// zoom on; + +// Generator table for y=log2(1+e^x) in Q8. +enum { kGenFuncTableSize = 128 }; +static const uint16_t kGenFuncTable[kGenFuncTableSize] = { + 256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693, + 4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756, + 8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819, + 12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881, + 16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944, + 20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006, + 24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069, + 28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132, + 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194, + 36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257, + 40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320, + 44689, 45058, 45428, 45797, 46166, 46536, 46905}; + +static const int16_t kAvgDecayTime = 250; // frames; < 3000 + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t digCompGaindB, // Q0 + int16_t targetLevelDbfs, // Q0 + uint8_t limiterEnable, + int16_t analogTarget) // Q0 +{ + // This function generates the compressor gain table used in the fixed digital + // part. + uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox; + int32_t inLevel, limiterLvl; + int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32; + const uint16_t kLog10 = 54426; // log2(10) in Q14 + const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14 + const uint16_t kLogE_1 = 23637; // log2(e) in Q14 + uint16_t constMaxGain; + uint16_t tmpU16, intPart, fracPart; + const int16_t kCompRatio = 3; + const int16_t kSoftLimiterLeft = 1; + int16_t limiterOffset = 0; // Limiter offset + int16_t limiterIdx, limiterLvlX; + int16_t constLinApprox, zeroGainLvl, maxGain, diffGain; + int16_t i, tmp16, tmp16no1; + int zeros, zerosScale; + + // Constants + // kLogE_1 = 23637; // log2(e) in Q14 + // kLog10 = 54426; // log2(10) in Q14 + // kLog10_2 = 49321; // 10*log10(2) in Q14 + + // Calculate maximum digital gain and zero gain level + tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1); + tmp16no1 = analogTarget - targetLevelDbfs; + tmp16no1 += + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); + tmp32no1 = maxGain * kCompRatio; + zeroGainLvl = digCompGaindB; + zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1), + kCompRatio - 1); + if ((digCompGaindB <= analogTarget) && (limiterEnable)) { + zeroGainLvl += (analogTarget - digCompGaindB + kSoftLimiterLeft); + limiterOffset = 0; + } + + // Calculate the difference between maximum gain and gain at 0dB0v: + // diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio + // = (compRatio-1)*digCompGaindB/compRatio + tmp32no1 = digCompGaindB * (kCompRatio - 1); + diffGain = + WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + if (diffGain < 0 || diffGain >= kGenFuncTableSize) { + RTC_DCHECK(0); + return -1; + } + + // Calculate the limiter level and index: + // limiterLvlX = analogTarget - limiterOffset + // limiterLvl = targetLevelDbfs + limiterOffset/compRatio + limiterLvlX = analogTarget - limiterOffset; + limiterIdx = 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX * (1 << 13), + kLog10_2 / 2); + tmp16no1 = + WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); + limiterLvl = targetLevelDbfs + tmp16no1; + + // Calculate (through table lookup): + // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8) + constMaxGain = kGenFuncTable[diffGain]; // in Q8 + + // Calculate a parameter used to approximate the fractional part of 2^x with a + // piecewise linear function in Q14: + // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14); + constLinApprox = 22817; // in Q14 + + // Calculate a denominator used in the exponential part to convert from dB to + // linear scale: + // den = 20*constMaxGain (in Q8) + den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8 + + for (i = 0; i < 32; i++) { + // Calculate scaled input level (compressor): + // inLevel = + // fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) + tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0 + tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 + inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 + + // Calculate diffGain-inLevel, to map using the genFuncTable + inLevel = (int32_t)diffGain * (1 << 14) - inLevel; // Q14 + + // Make calculations on abs(inLevel) and compensate for the sign afterwards. + absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14 + + // LUT with interpolation + intPart = (uint16_t)(absInLevel >> 14); + fracPart = + (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part + tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 + tmpU32no1 = tmpU16 * fracPart; // Q22 + tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22 + logApprox = tmpU32no1 >> 8; // Q14 + // Compensate for negative exponent using the relation: + // log2(1 + 2^-x) = log2(1 + 2^x) - x + if (inLevel < 0) { + zeros = WebRtcSpl_NormU32(absInLevel); + zerosScale = 0; + if (zeros < 15) { + // Not enough space for multiplication + tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1) + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) + if (zeros < 9) { + zerosScale = 9 - zeros; + tmpU32no1 >>= zerosScale; // Q(zeros+13) + } else { + tmpU32no2 >>= zeros - 9; // Q22 + } + } else { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 + tmpU32no2 >>= 6; // Q22 + } + logApprox = 0; + if (tmpU32no2 < tmpU32no1) { + logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14 + } + } + numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14 + numFIX -= (int32_t)logApprox * diffGain; // Q14 + + // Calculate ratio + // Shift |numFIX| as much as possible. + // Ensure we avoid wrap-around in |den| as well. + if (numFIX > (den >> 8) || -numFIX > (den >> 8)) // |den| is Q8. + { + zeros = WebRtcSpl_NormW32(numFIX); + } else { + zeros = WebRtcSpl_NormW32(den) + 8; + } + numFIX *= 1 << zeros; // Q(14+zeros) + + // Shift den so we end up in Qy1 + tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 9); // Q(zeros - 1) + y32 = numFIX / tmp32no1; // in Q15 + // This is to do rounding in Q14. + y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1); + + if (limiterEnable && (i < limiterIdx)) { + tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 + tmp32 -= limiterLvl * (1 << 14); // Q14 + y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); + } + if (y32 > 39000) { + tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27 + tmp32 >>= 13; // In Q14. + } else { + tmp32 = y32 * kLog10 + 8192; // in Q28 + tmp32 >>= 14; // In Q14. + } + tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16) + + // Calculate power + if (tmp32 > 0) { + intPart = (int16_t)(tmp32 >> 14); + fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14 + if ((fracPart >> 13) != 0) { + tmp16 = (2 << 14) - constLinApprox; + tmp32no2 = (1 << 14) - fracPart; + tmp32no2 *= tmp16; + tmp32no2 >>= 13; + tmp32no2 = (1 << 14) - tmp32no2; + } else { + tmp16 = constLinApprox - (1 << 14); + tmp32no2 = (fracPart * tmp16) >> 13; + } + fracPart = (uint16_t)tmp32no2; + gainTable[i] = + (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); + } else { + gainTable[i] = 0; + } + } + + return 0; +} + +int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) { + if (agcMode == kAgcModeFixedDigital) { + // start at minimum to find correct gain faster + stt->capacitorSlow = 0; + } else { + // start out with 0 dB gain + stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f); + } + stt->capacitorFast = 0; + stt->gain = 65536; + stt->gatePrevious = 0; + stt->agcMode = agcMode; +#ifdef WEBRTC_AGC_DEBUG_DUMP + stt->frameCounter = 0; +#endif + + // initialize VADs + WebRtcAgc_InitVad(&stt->vadNearend); + WebRtcAgc_InitVad(&stt->vadFarend); + + return 0; +} + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt, + const int16_t* in_far, + size_t nrSamples) { + RTC_DCHECK(stt); + // VAD for far end + WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); + + return 0; +} + +int32_t WebRtcAgc_ProcessDigital(DigitalAgc* stt, + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out, + uint32_t FS, + int16_t lowlevelSignal) { + // array for gains (one value per ms, incl start & end) + int32_t gains[11]; + + int32_t out_tmp, tmp32; + int32_t env[10]; + int32_t max_nrg; + int32_t cur_level; + int32_t gain32, delta; + int16_t logratio; + int16_t lower_thr, upper_thr; + int16_t zeros = 0, zeros_fast, frac = 0; + int16_t decay; + int16_t gate, gain_adj; + int16_t k; + size_t n, i, L; + int16_t L2; // samples/subframe + + // determine number of samples per ms + if (FS == 8000) { + L = 8; + L2 = 3; + } else if (FS == 16000 || FS == 32000 || FS == 48000) { + L = 16; + L2 = 4; + } else { + return -1; + } + + for (i = 0; i < num_bands; ++i) { + if (in_near[i] != out[i]) { + // Only needed if they don't already point to the same place. + memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0])); + } + } + // VAD for near end + logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10); + + // Account for far end VAD + if (stt->vadFarend.counter > 10) { + tmp32 = 3 * logratio; + logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2); + } + + // Determine decay factor depending on VAD + // upper_thr = 1.0f; + // lower_thr = 0.25f; + upper_thr = 1024; // Q10 + lower_thr = 0; // Q10 + if (logratio > upper_thr) { + // decay = -2^17 / DecayTime; -> -65 + decay = -65; + } else if (logratio < lower_thr) { + decay = 0; + } else { + // decay = (int16_t)(((lower_thr - logratio) + // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); + // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 + tmp32 = (lower_thr - logratio) * 65; + decay = (int16_t)(tmp32 >> 10); + } + + // adjust decay factor for long silence (detected as low standard deviation) + // This is only done in the adaptive modes + if (stt->agcMode != kAgcModeFixedDigital) { + if (stt->vadNearend.stdLongTerm < 4000) { + decay = 0; + } else if (stt->vadNearend.stdLongTerm < 8096) { + // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> + // 12); + tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay; + decay = (int16_t)(tmp32 >> 12); + } + + if (lowlevelSignal != 0) { + decay = 0; + } + } +#ifdef WEBRTC_AGC_DEBUG_DUMP + stt->frameCounter++; + fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100, + logratio, decay, stt->vadNearend.stdLongTerm); +#endif + // Find max amplitude per sub frame + // iterate over sub frames + for (k = 0; k < 10; k++) { + // iterate over samples + max_nrg = 0; + for (n = 0; n < L; n++) { + int32_t nrg = out[0][k * L + n] * out[0][k * L + n]; + if (nrg > max_nrg) { + max_nrg = nrg; + } + } + env[k] = max_nrg; + } + + // Calculate gain per sub frame + gains[0] = stt->gain; + for (k = 0; k < 10; k++) { + // Fast envelope follower + // decay time = -131000 / -1000 = 131 (ms) + stt->capacitorFast = + AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast); + if (env[k] > stt->capacitorFast) { + stt->capacitorFast = env[k]; + } + // Slow envelope follower + if (env[k] > stt->capacitorSlow) { + // increase capacitorSlow + stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), + stt->capacitorSlow); + } else { + // decrease capacitorSlow + stt->capacitorSlow = + AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); + } + + // use maximum of both capacitors as current level + if (stt->capacitorFast > stt->capacitorSlow) { + cur_level = stt->capacitorFast; + } else { + cur_level = stt->capacitorSlow; + } + // Translate signal level into gain, using a piecewise linear approximation + // find number of leading zeros + zeros = WebRtcSpl_NormU32((uint32_t)cur_level); + if (cur_level == 0) { + zeros = 31; + } + tmp32 = ((uint32_t)cur_level << zeros) & 0x7FFFFFFF; + frac = (int16_t)(tmp32 >> 19); // Q12. + tmp32 = (stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * frac; + gains[k + 1] = stt->gainTable[zeros] + (tmp32 >> 12); +#ifdef WEBRTC_AGC_DEBUG_DUMP + if (k == 0) { + fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level, + stt->capacitorFast, stt->capacitorSlow, zeros); + } +#endif + } + + // Gate processing (lower gain during absence of speech) + zeros = (zeros << 9) - (frac >> 3); + // find number of leading zeros + zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast); + if (stt->capacitorFast == 0) { + zeros_fast = 31; + } + tmp32 = ((uint32_t)stt->capacitorFast << zeros_fast) & 0x7FFFFFFF; + zeros_fast <<= 9; + zeros_fast -= (int16_t)(tmp32 >> 22); + + gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; + + if (gate < 0) { + stt->gatePrevious = 0; + } else { + tmp32 = stt->gatePrevious * 7; + gate = (int16_t)((gate + tmp32) >> 3); + stt->gatePrevious = gate; + } + // gate < 0 -> no gate + // gate > 2500 -> max gate + if (gate > 0) { + if (gate < 2500) { + gain_adj = (2500 - gate) >> 5; + } else { + gain_adj = 0; + } + for (k = 0; k < 10; k++) { + if ((gains[k + 1] - stt->gainTable[0]) > 8388608) { + // To prevent wraparound + tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8; + tmp32 *= 178 + gain_adj; + } else { + tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj); + tmp32 >>= 8; + } + gains[k + 1] = stt->gainTable[0] + tmp32; + } + } + + // Limit gain to avoid overload distortion + for (k = 0; k < 10; k++) { + // To prevent wrap around + zeros = 10; + if (gains[k + 1] > 47453132) { + zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + // check for overflow + while (AGC_MUL32((env[k] >> 12) + 1, gain32) > + WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) { + // multiply by 253/256 ==> -0.1 dB + if (gains[k + 1] > 8388607) { + // Prevent wrap around + gains[k + 1] = (gains[k + 1] / 256) * 253; + } else { + gains[k + 1] = (gains[k + 1] * 253) / 256; + } + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; + } + } + // gain reductions should be done 1 ms earlier than gain increases + for (k = 1; k < 10; k++) { + if (gains[k] > gains[k + 1]) { + gains[k] = gains[k + 1]; + } + } + // save start gain for next frame + stt->gain = gains[10]; + + // Apply gain + // handle first sub frame separately + delta = (gains[1] - gains[0]) * (1 << (4 - L2)); + gain32 = gains[0] * (1 << 4); + // iterate over samples + for (n = 0; n < L; n++) { + for (i = 0; i < num_bands; ++i) { + tmp32 = out[i][n] * ((gain32 + 127) >> 7); + out_tmp = tmp32 >> 16; + if (out_tmp > 4095) { + out[i][n] = (int16_t)32767; + } else if (out_tmp < -4096) { + out[i][n] = (int16_t)-32768; + } else { + tmp32 = out[i][n] * (gain32 >> 4); + out[i][n] = (int16_t)(tmp32 >> 16); + } + } + // + + gain32 += delta; + } + // iterate over subframes + for (k = 1; k < 10; k++) { + delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2)); + gain32 = gains[k] * (1 << 4); + // iterate over samples + for (n = 0; n < L; n++) { + for (i = 0; i < num_bands; ++i) { + int64_t tmp64 = ((int64_t)(out[i][k * L + n])) * (gain32 >> 4); + tmp64 = tmp64 >> 16; + if (tmp64 > 32767) { + out[i][k * L + n] = 32767; + } + else if (tmp64 < -32768) { + out[i][k * L + n] = -32768; + } + else { + out[i][k * L + n] = (int16_t)(tmp64); + } + } + gain32 += delta; + } + } + + return 0; +} + +void WebRtcAgc_InitVad(AgcVad* state) { + int16_t k; + + state->HPstate = 0; // state of high pass filter + state->logRatio = 0; // log( P(active) / P(inactive) ) + // average input level (Q10) + state->meanLongTerm = 15 << 10; + + // variance of input level (Q8) + state->varianceLongTerm = 500 << 8; + + state->stdLongTerm = 0; // standard deviation of input level in dB + // short-term average input level (Q10) + state->meanShortTerm = 15 << 10; + + // short-term variance of input level (Q8) + state->varianceShortTerm = 500 << 8; + + state->stdShortTerm = + 0; // short-term standard deviation of input level in dB + state->counter = 3; // counts updates + for (k = 0; k < 8; k++) { + // downsampling filter + state->downState[k] = 0; + } +} + +int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples) // (i) number of samples +{ + uint32_t nrg; + int32_t out, tmp32, tmp32b; + uint16_t tmpU16; + int16_t k, subfr, tmp16; + int16_t buf1[8]; + int16_t buf2[4]; + int16_t HPstate; + int16_t zeros, dB; + + // process in 10 sub frames of 1 ms (to save on memory) + nrg = 0; + HPstate = state->HPstate; + for (subfr = 0; subfr < 10; subfr++) { + // downsample to 4 kHz + if (nrSamples == 160) { + for (k = 0; k < 8; k++) { + tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1]; + tmp32 >>= 1; + buf1[k] = (int16_t)tmp32; + } + in += 16; + + WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState); + } else { + WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState); + in += 8; + } + + // high pass filter and compute energy + for (k = 0; k < 4; k++) { + out = buf2[k] + HPstate; + tmp32 = 600 * out; + HPstate = (int16_t)((tmp32 >> 10) - buf2[k]); + + // Add 'out * out / 2**6' to 'nrg' in a non-overflowing + // way. Guaranteed to work as long as 'out * out / 2**6' fits in + // an int32_t. + nrg += out * (out / (1 << 6)); + nrg += out * (out % (1 << 6)) / (1 << 6); + } + } + state->HPstate = HPstate; + + // find number of leading zeros + if (!(0xFFFF0000 & nrg)) { + zeros = 16; + } else { + zeros = 0; + } + if (!(0xFF000000 & (nrg << zeros))) { + zeros += 8; + } + if (!(0xF0000000 & (nrg << zeros))) { + zeros += 4; + } + if (!(0xC0000000 & (nrg << zeros))) { + zeros += 2; + } + if (!(0x80000000 & (nrg << zeros))) { + zeros += 1; + } + + // energy level (range {-32..30}) (Q10) + dB = (15 - zeros) * (1 << 11); + + // Update statistics + + if (state->counter < kAvgDecayTime) { + // decay time = AvgDecTime * 10 ms + state->counter++; + } + + // update short-term estimate of mean energy level (Q10) + tmp32 = state->meanShortTerm * 15 + dB; + state->meanShortTerm = (int16_t)(tmp32 >> 4); + + // update short-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceShortTerm * 15; + state->varianceShortTerm = tmp32 / 16; + + // update short-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanShortTerm * state->meanShortTerm; + tmp32 = (state->varianceShortTerm << 12) - tmp32; + state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update long-term estimate of mean energy level (Q10) + tmp32 = state->meanLongTerm * state->counter + dB; + state->meanLongTerm = + WebRtcSpl_DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of variance in energy level (Q8) + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceLongTerm * state->counter; + state->varianceLongTerm = + WebRtcSpl_DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); + + // update long-term estimate of standard deviation in energy level (Q10) + tmp32 = state->meanLongTerm * state->meanLongTerm; + tmp32 = (state->varianceLongTerm << 12) - tmp32; + state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); + + // update voice activity measure (Q10) + tmp16 = 3 << 12; + // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in + // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16() + // was used, which did an intermediate cast to (int16_t), hence losing + // significant bits. This cause logRatio to max out positive, rather than + // negative. This is a bug, but has very little significance. + tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm); + tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); + tmpU16 = (13 << 12); + tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); + tmp32 += tmp32b >> 10; + + state->logRatio = (int16_t)(tmp32 >> 6); + + // limit + if (state->logRatio > 2048) { + state->logRatio = 2048; + } + if (state->logRatio < -2048) { + state->logRatio = -2048; + } + + return state->logRatio; // Q10 +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/digital_agc.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/digital_agc.h new file mode 100644 index 0000000000..af6cf48837 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/digital_agc.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ + +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include <stdio.h> +#endif +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "typedefs.h" // NOLINT(build/include) + +// the 32 most significant bits of A(19) * B(26) >> 13 +#define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13)) +// C + the 32 most significant bits of A * B +#define AGC_SCALEDIFF32(A, B, C) \ + ((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16)) + +typedef struct { + int32_t downState[8]; + int16_t HPstate; + int16_t counter; + int16_t logRatio; // log( P(active) / P(inactive) ) (Q10) + int16_t meanLongTerm; // Q10 + int32_t varianceLongTerm; // Q8 + int16_t stdLongTerm; // Q10 + int16_t meanShortTerm; // Q10 + int32_t varianceShortTerm; // Q8 + int16_t stdShortTerm; // Q10 +} AgcVad; // total = 54 bytes + +typedef struct { + int32_t capacitorSlow; + int32_t capacitorFast; + int32_t gain; + int32_t gainTable[32]; + int16_t gatePrevious; + int16_t agcMode; + AgcVad vadNearend; + AgcVad vadFarend; +#ifdef WEBRTC_AGC_DEBUG_DUMP + FILE* logFile; + int frameCounter; +#endif +} DigitalAgc; + +int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode); + +int32_t WebRtcAgc_ProcessDigital(DigitalAgc* digitalAgcInst, + const int16_t* const* inNear, + size_t num_bands, + int16_t* const* out, + uint32_t FS, + int16_t lowLevelSignal); + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst, + const int16_t* inFar, + size_t nrSamples); + +void WebRtcAgc_InitVad(AgcVad* vadInst); + +int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples); // (i) number of samples + +int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16 + int16_t compressionGaindB, // Q0 (in dB) + int16_t targetLevelDbfs, // Q0 (in dB) + uint8_t limiterEnable, + int16_t analogTarget); + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/gain_control.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/gain_control.h new file mode 100644 index 0000000000..0f121b143a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/gain_control.h @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ + +#include "typedefs.h" // NOLINT(build/include) + +// Errors +#define AGC_UNSPECIFIED_ERROR 18000 +#define AGC_UNSUPPORTED_FUNCTION_ERROR 18001 +#define AGC_UNINITIALIZED_ERROR 18002 +#define AGC_NULL_POINTER_ERROR 18003 +#define AGC_BAD_PARAMETER_ERROR 18004 + +// Warnings +#define AGC_BAD_PARAMETER_WARNING 18050 + +enum { + kAgcModeUnchanged, + kAgcModeAdaptiveAnalog, + kAgcModeAdaptiveDigital, + kAgcModeFixedDigital +}; + +enum { kAgcFalse = 0, kAgcTrue }; + +typedef struct { + int16_t targetLevelDbfs; // default 3 (-3 dBOv) + int16_t compressionGaindB; // default 9 dB + uint8_t limiterEnable; // default kAgcTrue (on) +} WebRtcAgcConfig; + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * This function analyses the number of samples passed to + * farend and produces any error code that could arise. + * + * Input: + * - agcInst : AGC instance. + * - samples : Number of samples in input vector. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error. + */ +int WebRtcAgc_GetAddFarendError(void* state, size_t samples); + +/* + * This function processes a 10 ms frame of far-end speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inFar : Far-end input speech vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddFarend(void* agcInst, const int16_t* inFar, size_t samples); + +/* + * This function processes a 10 ms frame of microphone speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). For very low input levels, the input signal is increased in level + * by multiplying and overwriting the samples in inMic[]. + * + * This function should be called before any further processing of the + * near-end microphone signal. + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples); + +/* + * This function replaces the analog microphone with a virtual one. + * It is a digital gain applied to the input signal and is used in the + * agcAdaptiveDigital mode where no microphone level is adjustable. The length + * of the input speech vector must be given in samples (80 when FS=8000, and 160 + * when FS=16000, FS=32000 or FS=48000). + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector + * - samples : Number of samples in input vector + * - micLevelIn : Input level of microphone (static) + * + * Output: + * - inMic : Microphone output after processing (L band) + * - inMic_H : Microphone output after processing (H band) + * - micLevelOut : Adjusted microphone level after processing + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_VirtualMic(void* agcInst, + int16_t* const* inMic, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut); + +/* + * This function processes a 10 ms frame and adjusts (normalizes) the gain both + * analog and digitally. The gain adjustments are done only during active + * periods of speech. The length of the speech vectors must be given in samples + * (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo + * parameter can be used to ensure the AGC will not adjust upward in the + * presence of echo. + * + * This function should be called after processing the near-end microphone + * signal, in any case after any echo cancellation. + * + * Input: + * - agcInst : AGC instance + * - inNear : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector + * - samples : Number of samples in input/output vector + * - inMicLevel : Current microphone volume level + * - echo : Set to 0 if the signal passed to add_mic is + * almost certainly free of echo; otherwise set + * to 1. If you have no information regarding echo + * set to 0. + * + * Output: + * - outMicLevel : Adjusted microphone volume level + * - out : Gain-adjusted near-end speech vector + * : May be the same vector as the input. + * - saturationWarning : A returned value of 1 indicates a saturation event + * has occurred and the volume cannot be further + * reduced. Otherwise will be set to 0. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Process(void* agcInst, + const int16_t* const* inNear, + size_t num_bands, + size_t samples, + int16_t* const* out, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning); + +/* + * This function sets the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * - config : config struct + * + * Output: + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config); + +/* + * This function returns the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * + * Output: + * - config : config struct + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config); + +/* + * This function creates and returns an AGC instance, which will contain the + * state information for one (duplex) channel. + */ +void* WebRtcAgc_Create(); + +/* + * This function frees the AGC instance created at the beginning. + * + * Input: + * - agcInst : AGC instance. + */ +void WebRtcAgc_Free(void* agcInst); + +/* + * This function initializes an AGC instance. + * + * Input: + * - agcInst : AGC instance. + * - minLevel : Minimum possible mic level + * - maxLevel : Maximum possible mic level + * - agcMode : 0 - Unchanged + * : 1 - Adaptive Analog Automatic Gain Control -3dBOv + * : 2 - Adaptive Digital Automatic Gain Control -3dBOv + * : 3 - Fixed Digital Gain 0dB + * - fs : Sampling frequency + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcAgc_Init(void* agcInst, + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs); + +#if defined(__cplusplus) +} +#endif + +#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram.cc new file mode 100644 index 0000000000..63d5f7cad0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram.cc @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include <cmath> +#include <cstring> + +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +static const double kHistBinCenters[] = { + 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, + 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, + 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, + 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, + 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, + 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, + 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, + 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, + 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, + 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, + 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, + 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, + 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, + 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, + 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, + 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, + 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, + 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, + 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, + 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, + 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, + 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, + 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, + 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, + 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, + 3.00339145144454e+04, 3.56647189489147e+04}; + +static const double kProbQDomain = 1024.0; +// Loudness of -15 dB (smallest expected loudness) in log domain, +// loudness_db = 13.5 * log10(rms); +static const double kLogDomainMinBinCenter = -2.57752062648587; +// Loudness step of 1 dB in log domain +static const double kLogDomainStepSizeInverse = 5.81954605750359; + +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static const int kLowProbThresholdQ10 = + static_cast<int>(kLowProbabilityThreshold * kProbQDomain); + +LoudnessHistogram::LoudnessHistogram() + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(), + hist_bin_index_(), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(0), + len_high_activity_(0) { + static_assert( + kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), + "histogram bin centers incorrect size"); +} + +LoudnessHistogram::LoudnessHistogram(int window_size) + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(new int[window_size]), + hist_bin_index_(new int[window_size]), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(window_size), + len_high_activity_(0) {} + +LoudnessHistogram::~LoudnessHistogram() {} + +void LoudnessHistogram::Update(double rms, double activity_probaility) { + // If circular histogram is activated then remove the oldest entry. + if (len_circular_buffer_ > 0) + RemoveOldestEntryAndUpdate(); + + // Find the corresponding bin. + int hist_index = GetBinIndex(rms); + // To Q10 domain. + int prob_q10 = + static_cast<int16_t>(floor(activity_probaility * kProbQDomain)); + InsertNewestEntryAndUpdate(prob_q10, hist_index); +} + +// Doing nothing if buffer is not full, yet. +void LoudnessHistogram::RemoveOldestEntryAndUpdate() { + RTC_DCHECK_GT(len_circular_buffer_, 0); + // Do nothing if circular buffer is not full. + if (!buffer_is_full_) + return; + + int oldest_prob = activity_probability_[buffer_index_]; + int oldest_hist_index = hist_bin_index_[buffer_index_]; + UpdateHist(-oldest_prob, oldest_hist_index); +} + +void LoudnessHistogram::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // |kTransientWidthThreshold| or there has not been any transient. + RTC_DCHECK_LE(len_high_activity_, kTransientWidthThreshold); + int index = + (buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1; + while (len_high_activity_ > 0) { + UpdateHist(-activity_probability_[index], hist_bin_index_[index]); + activity_probability_[index] = 0; + index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); + len_high_activity_--; + } +} + +void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10, + int hist_index) { + // Update the circular buffer if it is enabled. + if (len_circular_buffer_ > 0) { + // Removing transient. + if (activity_prob_q10 <= kLowProbThresholdQ10) { + // Lower than threshold probability, set it to zero. + activity_prob_q10 = 0; + // Check if this has been a transient. + if (len_high_activity_ <= kTransientWidthThreshold) + RemoveTransient(); // Remove this transient. + len_high_activity_ = 0; + } else if (len_high_activity_ <= kTransientWidthThreshold) { + len_high_activity_++; + } + // Updating the circular buffer. + activity_probability_[buffer_index_] = activity_prob_q10; + hist_bin_index_[buffer_index_] = hist_index; + // Increment the buffer index and check for wrap-around. + buffer_index_++; + if (buffer_index_ >= len_circular_buffer_) { + buffer_index_ = 0; + buffer_is_full_ = true; + } + } + + num_updates_++; + if (num_updates_ < 0) + num_updates_--; + + UpdateHist(activity_prob_q10, hist_index); +} + +void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) { + bin_count_q10_[hist_index] += activity_prob_q10; + audio_content_q10_ += activity_prob_q10; +} + +double LoudnessHistogram::AudioContent() const { + return audio_content_q10_ / kProbQDomain; +} + +LoudnessHistogram* LoudnessHistogram::Create() { + return new LoudnessHistogram; +} + +LoudnessHistogram* LoudnessHistogram::Create(int window_size) { + if (window_size < 0) + return NULL; + return new LoudnessHistogram(window_size); +} + +void LoudnessHistogram::Reset() { + // Reset the histogram, audio-content and number of updates. + memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); + audio_content_q10_ = 0; + num_updates_ = 0; + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + len_high_activity_ = 0; +} + +int LoudnessHistogram::GetBinIndex(double rms) { + // First exclude overload cases. + if (rms <= kHistBinCenters[0]) { + return 0; + } else if (rms >= kHistBinCenters[kHistSize - 1]) { + return kHistSize - 1; + } else { + // The quantizer is uniform in log domain. Alternatively we could do binary + // search in linear domain. + double rms_log = log(rms); + + int index = static_cast<int>( + floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse)); + // The final decision is in linear domain. + double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); + if (rms > b) { + return index + 1; + } + return index; + } +} + +double LoudnessHistogram::CurrentRms() const { + double p; + double mean_val = 0; + if (audio_content_q10_ > 0) { + double p_total_inverse = 1. / static_cast<double>(audio_content_q10_); + for (int n = 0; n < kHistSize; n++) { + p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse; + mean_val += p * kHistBinCenters[n]; + } + } else { + mean_val = kHistBinCenters[0]; + } + return mean_val; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram.h new file mode 100644 index 0000000000..ab45276f06 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ +#define MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ + +#include <string.h> + +#include <memory> + +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +// This class implements the histogram of loudness with circular buffers so that +// the histogram tracks the last T seconds of the loudness. +class LoudnessHistogram { + public: + // Create a non-sliding LoudnessHistogram. + static LoudnessHistogram* Create(); + + // Create a sliding LoudnessHistogram, i.e. the histogram represents the last + // |window_size| samples. + static LoudnessHistogram* Create(int window_size); + ~LoudnessHistogram(); + + // Insert RMS and the corresponding activity probability. + void Update(double rms, double activity_probability); + + // Reset the histogram, forget the past. + void Reset(); + + // Current loudness, which is actually the mean of histogram in loudness + // domain. + double CurrentRms() const; + + // Sum of the histogram content. + double AudioContent() const; + + // Number of times the histogram has been updated. + int num_updates() const { return num_updates_; } + + private: + LoudnessHistogram(); + explicit LoudnessHistogram(int window); + + // Find the histogram bin associated with the given |rms|. + int GetBinIndex(double rms); + + void RemoveOldestEntryAndUpdate(); + void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index); + void UpdateHist(int activity_prob_q10, int hist_index); + void RemoveTransient(); + + // Number of histogram bins. + static const int kHistSize = 77; + + // Number of times the histogram is updated + int num_updates_; + // Audio content, this should be equal to the sum of the components of + // |bin_count_q10_|. + int64_t audio_content_q10_; + + // LoudnessHistogram of input RMS in Q10 with |kHistSize_| bins. In each + // 'Update(),' we increment the associated histogram-bin with the given + // probability. The increment is implemented in Q10 to avoid rounding errors. + int64_t bin_count_q10_[kHistSize]; + + // Circular buffer for probabilities + std::unique_ptr<int[]> activity_probability_; + // Circular buffer for histogram-indices of probabilities. + std::unique_ptr<int[]> hist_bin_index_; + // Current index of circular buffer, where the newest data will be written to, + // therefore, pointing to the oldest data if buffer is full. + int buffer_index_; + // Indicating if buffer is full and we had a wrap around. + int buffer_is_full_; + // Size of circular buffer. + int len_circular_buffer_; + int len_high_activity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc new file mode 100644 index 0000000000..8c617107ff --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram_unittest.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Use CreateHistUnittestFile.m to generate the input file. + +#include "modules/audio_processing/agc/loudness_histogram.h" + +#include <stdio.h> +#include <algorithm> +#include <cmath> +#include <memory> + +#include "modules/audio_processing/agc/utility.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { + +struct InputOutput { + double rms; + double activity_probability; + double audio_content; + double loudness; +}; + +const double kRelativeErrTol = 1e-10; + +class LoudnessHistogramTest : public ::testing::Test { + protected: + void RunTest(bool enable_circular_buff, const char* filename); + + private: + void TestClean(); + std::unique_ptr<LoudnessHistogram> hist_; +}; + +void LoudnessHistogramTest::TestClean() { + EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02); + EXPECT_EQ(hist_->AudioContent(), 0); + EXPECT_EQ(hist_->num_updates(), 0); +} + +void LoudnessHistogramTest::RunTest(bool enable_circular_buff, + const char* filename) { + FILE* in_file = fopen(filename, "rb"); + ASSERT_TRUE(in_file != NULL); + if (enable_circular_buff) { + int buffer_size; + EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u); + hist_.reset(LoudnessHistogram::Create(buffer_size)); + } else { + hist_.reset(LoudnessHistogram::Create()); + } + TestClean(); + + InputOutput io; + int num_updates = 0; + int num_reset = 0; + while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) { + if (io.rms < 0) { + // We have to reset. + hist_->Reset(); + TestClean(); + num_updates = 0; + num_reset++; + // Read the next chunk of input. + if (fread(&io, sizeof(InputOutput), 1, in_file) != 1) + break; + } + hist_->Update(io.rms, io.activity_probability); + num_updates++; + EXPECT_EQ(hist_->num_updates(), num_updates); + double audio_content = hist_->AudioContent(); + + double abs_err = + std::min(audio_content, io.audio_content) * kRelativeErrTol; + + ASSERT_NEAR(audio_content, io.audio_content, abs_err); + double current_loudness = Linear2Loudness(hist_->CurrentRms()); + abs_err = + std::min(fabs(current_loudness), fabs(io.loudness)) * kRelativeErrTol; + ASSERT_NEAR(current_loudness, io.loudness, abs_err); + } + fclose(in_file); +} + +TEST_F(LoudnessHistogramTest, ActiveCircularBuffer) { + RunTest(true, test::ResourcePath( + "audio_processing/agc/agc_with_circular_buffer", "dat") + .c_str()); +} + +TEST_F(LoudnessHistogramTest, InactiveCircularBuffer) { + RunTest(false, test::ResourcePath( + "audio_processing/agc/agc_no_circular_buffer", "dat") + .c_str()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/mock_agc.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/mock_agc.h new file mode 100644 index 0000000000..36901af50c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/mock_agc.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ + +#include "modules/audio_processing/agc/agc.h" + +#include "modules/include/module_common_types.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockAgc : public Agc { + public: + MOCK_METHOD2(AnalyzePreproc, float(const int16_t* audio, size_t length)); + MOCK_METHOD3(Process, int(const int16_t* audio, size_t length, + int sample_rate_hz)); + MOCK_METHOD1(GetRmsErrorDb, bool(int* error)); + MOCK_METHOD0(Reset, void()); + MOCK_METHOD1(set_target_level_dbfs, int(int level)); + MOCK_CONST_METHOD0(target_level_dbfs, int()); + MOCK_METHOD1(EnableStandaloneVad, void(bool enable)); + MOCK_CONST_METHOD0(standalone_vad_enabled, bool()); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/utility.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/utility.cc new file mode 100644 index 0000000000..554a8377aa --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/utility.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc/utility.h" + +#include <math.h> + +static const double kLog10 = 2.30258509299; +static const double kLinear2DbScale = 20.0 / kLog10; +static const double kLinear2LoudnessScale = 13.4 / kLog10; + +double Loudness2Db(double loudness) { + return loudness * kLinear2DbScale / kLinear2LoudnessScale; +} + +double Linear2Loudness(double rms) { + if (rms == 0) + return -15; + return kLinear2LoudnessScale * log(rms); +} + +double Db2Loudness(double db) { + return db * kLinear2LoudnessScale / kLinear2DbScale; +} + +double Dbfs2Loudness(double dbfs) { + return Db2Loudness(90 + dbfs); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc/utility.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/utility.h new file mode 100644 index 0000000000..8ba87b6d55 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc/utility.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ +#define MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ + +// TODO(turajs): Add description of function. +double Loudness2Db(double loudness); + +double Linear2Loudness(double rms); + +double Db2Loudness(double db); + +double Dbfs2Loudness(double dbfs); + +#endif // MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2.cc new file mode 100644 index 0000000000..ca564e8b1d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/gain_controller2.h" + +#include <cmath> + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +int GainController2::instance_count_ = 0; + +GainController2::GainController2() + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + sample_rate_hz_(AudioProcessing::kSampleRate48kHz), + fixed_gain_(1.f) {} + +GainController2::~GainController2() = default; + +void GainController2::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + sample_rate_hz_ = sample_rate_hz; + data_dumper_->InitiateNewSetOfRecordings(); + data_dumper_->DumpRaw("sample_rate_hz", sample_rate_hz_); + data_dumper_->DumpRaw("fixed_gain_linear", fixed_gain_); +} + +void GainController2::Process(AudioBuffer* audio) { + if (fixed_gain_ == 1.f) + return; + + for (size_t k = 0; k < audio->num_channels(); ++k) { + for (size_t j = 0; j < audio->num_frames(); ++j) { + audio->channels_f()[k][j] = rtc::SafeClamp( + fixed_gain_ * audio->channels_f()[k][j], -32768.f, 32767.f); + } + } +} + +void GainController2::ApplyConfig( + const AudioProcessing::Config::GainController2& config) { + RTC_DCHECK(Validate(config)); + fixed_gain_ = std::pow(10.f, config.fixed_gain_db / 20.f); +} + +bool GainController2::Validate( + const AudioProcessing::Config::GainController2& config) { + return config.fixed_gain_db >= 0.f; +} + +std::string GainController2::ToString( + const AudioProcessing::Config::GainController2& config) { + std::stringstream ss; + ss << "{enabled: " << (config.enabled ? "true" : "false") << ", " + << "fixed_gain_dB: " << config.fixed_gain_db << "}"; + return ss.str(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2.h b/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2.h new file mode 100644 index 0000000000..11706870f4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_GAIN_CONTROLLER2_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_GAIN_CONTROLLER2_H_ + +#include <memory> +#include <string> + +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +// Gain Controller 2 aims to automatically adjust levels by acting on the +// microphone gain and/or applying digital gain. +// +// Temporarily implements a fixed gain mode with hard-clipping. +class GainController2 { + public: + GainController2(); + ~GainController2(); + + void Initialize(int sample_rate_hz); + void Process(AudioBuffer* audio); + + void ApplyConfig(const AudioProcessing::Config::GainController2& config); + static bool Validate(const AudioProcessing::Config::GainController2& config); + static std::string ToString( + const AudioProcessing::Config::GainController2& config); + + private: + static int instance_count_; + std::unique_ptr<ApmDataDumper> data_dumper_; + int sample_rate_hz_; + float fixed_gain_; + + RTC_DISALLOW_COPY_AND_ASSIGN(GainController2); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_GAIN_CONTROLLER2_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2_unittest.cc new file mode 100644 index 0000000000..46f654db62 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2_unittest.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <algorithm> + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/gain_controller2.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +namespace { + +constexpr size_t kFrameSizeMs = 10u; +constexpr size_t kStereo = 2u; + +void SetAudioBufferSamples(float value, AudioBuffer* ab) { + // Sets all the samples in |ab| to |value|. + for (size_t k = 0; k < ab->num_channels(); ++k) { + std::fill(ab->channels_f()[k], ab->channels_f()[k] + ab->num_frames(), + value); + } +} + +} // namespace + +TEST(GainController2, CreateApplyConfig) { + // Instances GainController2 and applies different configurations. + std::unique_ptr<GainController2> gain_controller2(new GainController2()); + + // Check that the default config is valid. + AudioProcessing::Config::GainController2 config; + EXPECT_TRUE(GainController2::Validate(config)); + gain_controller2->ApplyConfig(config); + + // Check that attenuation is not allowed. + config.fixed_gain_db = -5.f; + EXPECT_FALSE(GainController2::Validate(config)); + + // Check that valid configurations are applied. + for (const float& fixed_gain_db : {0.f, 5.f, 10.f, 50.f}) { + config.fixed_gain_db = fixed_gain_db; + EXPECT_TRUE(GainController2::Validate(config)); + gain_controller2->ApplyConfig(config); + } +} + +TEST(GainController2, ToString) { + // Tests GainController2::ToString(). + AudioProcessing::Config::GainController2 config; + config.fixed_gain_db = 5.f; + + config.enabled = false; + EXPECT_EQ("{enabled: false, fixed_gain_dB: 5}", + GainController2::ToString(config)); + + config.enabled = true; + EXPECT_EQ("{enabled: true, fixed_gain_dB: 5}", + GainController2::ToString(config)); +} + +TEST(GainController2, Usage) { + // Tests GainController2::Process() on an AudioBuffer instance. + std::unique_ptr<GainController2> gain_controller2(new GainController2()); + gain_controller2->Initialize(AudioProcessing::kSampleRate48kHz); + const size_t num_frames = rtc::CheckedDivExact<size_t>( + kFrameSizeMs * AudioProcessing::kSampleRate48kHz, 1000); + AudioBuffer ab(num_frames, kStereo, num_frames, kStereo, num_frames); + constexpr float sample_value = 1000.f; + SetAudioBufferSamples(sample_value, &ab); + AudioProcessing::Config::GainController2 config; + + // Check that samples are not modified when the fixed gain is 0 dB. + ASSERT_EQ(config.fixed_gain_db, 0.f); + gain_controller2->ApplyConfig(config); + gain_controller2->Process(&ab); + EXPECT_EQ(ab.channels_f()[0][0], sample_value); + + // Check that samples are amplified when the fixed gain is greater than 0 dB. + config.fixed_gain_db = 5.f; + gain_controller2->ApplyConfig(config); + gain_controller2->Process(&ab); + EXPECT_LT(sample_value, ab.channels_f()[0][0]); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer.cc new file mode 100644 index 0000000000..16f11742e3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer.cc @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_buffer.h" + +#include "common_audio/channel_buffer.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/common.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +const size_t kSamplesPer16kHzChannel = 160; +const size_t kSamplesPer32kHzChannel = 320; +const size_t kSamplesPer48kHzChannel = 480; + +int KeyboardChannelIndex(const StreamConfig& stream_config) { + if (!stream_config.has_keyboard()) { + RTC_NOTREACHED(); + return 0; + } + + return stream_config.num_channels(); +} + +size_t NumBandsFromSamplesPerChannel(size_t num_frames) { + size_t num_bands = 1; + if (num_frames == kSamplesPer32kHzChannel || + num_frames == kSamplesPer48kHzChannel) { + num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel); + } + return num_bands; +} + +} // namespace + +AudioBuffer::AudioBuffer(size_t input_num_frames, + size_t num_input_channels, + size_t process_num_frames, + size_t num_process_channels, + size_t output_num_frames) + : input_num_frames_(input_num_frames), + num_input_channels_(num_input_channels), + proc_num_frames_(process_num_frames), + num_proc_channels_(num_process_channels), + output_num_frames_(output_num_frames), + num_channels_(num_process_channels), + num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)), + num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)), + mixed_low_pass_valid_(false), + reference_copied_(false), + activity_(AudioFrame::kVadUnknown), + keyboard_data_(NULL), + data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)), + output_buffer_(new IFChannelBuffer(output_num_frames_, num_channels_)) { + RTC_DCHECK_GT(input_num_frames_, 0); + RTC_DCHECK_GT(proc_num_frames_, 0); + RTC_DCHECK_GT(output_num_frames_, 0); + RTC_DCHECK_GT(num_input_channels_, 0); + RTC_DCHECK_GT(num_proc_channels_, 0); + RTC_DCHECK_LE(num_proc_channels_, num_input_channels_); + + if (input_num_frames_ != proc_num_frames_ || + output_num_frames_ != proc_num_frames_) { + // Create an intermediate buffer for resampling. + process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_, + num_proc_channels_)); + + if (input_num_frames_ != proc_num_frames_) { + for (size_t i = 0; i < num_proc_channels_; ++i) { + input_resamplers_.push_back(std::unique_ptr<PushSincResampler>( + new PushSincResampler(input_num_frames_, proc_num_frames_))); + } + } + + if (output_num_frames_ != proc_num_frames_) { + for (size_t i = 0; i < num_proc_channels_; ++i) { + output_resamplers_.push_back(std::unique_ptr<PushSincResampler>( + new PushSincResampler(proc_num_frames_, output_num_frames_))); + } + } + } + + if (num_bands_ > 1) { + split_data_.reset(new IFChannelBuffer(proc_num_frames_, + num_proc_channels_, + num_bands_)); + splitting_filter_.reset(new SplittingFilter(num_proc_channels_, + num_bands_, + proc_num_frames_)); + } +} + +AudioBuffer::~AudioBuffer() {} + +void AudioBuffer::CopyFrom(const float* const* data, + const StreamConfig& stream_config) { + RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); + RTC_DCHECK_EQ(stream_config.num_channels(), num_input_channels_); + InitForNewData(); + // Initialized lazily because there's a different condition in + // DeinterleaveFrom. + const bool need_to_downmix = + num_input_channels_ > 1 && num_proc_channels_ == 1; + if (need_to_downmix && !input_buffer_) { + input_buffer_.reset( + new IFChannelBuffer(input_num_frames_, num_proc_channels_)); + } + + if (stream_config.has_keyboard()) { + keyboard_data_ = data[KeyboardChannelIndex(stream_config)]; + } + + // Downmix. + const float* const* data_ptr = data; + if (need_to_downmix) { + DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_, + input_buffer_->fbuf()->channels()[0]); + data_ptr = input_buffer_->fbuf_const()->channels(); + } + + // Resample. + if (input_num_frames_ != proc_num_frames_) { + for (size_t i = 0; i < num_proc_channels_; ++i) { + input_resamplers_[i]->Resample(data_ptr[i], + input_num_frames_, + process_buffer_->channels()[i], + proc_num_frames_); + } + data_ptr = process_buffer_->channels(); + } + + // Convert to the S16 range. + for (size_t i = 0; i < num_proc_channels_; ++i) { + FloatToFloatS16(data_ptr[i], + proc_num_frames_, + data_->fbuf()->channels()[i]); + } +} + +void AudioBuffer::CopyTo(const StreamConfig& stream_config, + float* const* data) { + RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); + RTC_DCHECK(stream_config.num_channels() == num_channels_ || + num_channels_ == 1); + + // Convert to the float range. + float* const* data_ptr = data; + if (output_num_frames_ != proc_num_frames_) { + // Convert to an intermediate buffer for subsequent resampling. + data_ptr = process_buffer_->channels(); + } + for (size_t i = 0; i < num_channels_; ++i) { + FloatS16ToFloat(data_->fbuf()->channels()[i], + proc_num_frames_, + data_ptr[i]); + } + + // Resample. + if (output_num_frames_ != proc_num_frames_) { + for (size_t i = 0; i < num_channels_; ++i) { + output_resamplers_[i]->Resample(data_ptr[i], + proc_num_frames_, + data[i], + output_num_frames_); + } + } + + // Upmix. + for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) { + memcpy(data[i], data[0], output_num_frames_ * sizeof(**data)); + } +} + +void AudioBuffer::InitForNewData() { + keyboard_data_ = NULL; + mixed_low_pass_valid_ = false; + reference_copied_ = false; + activity_ = AudioFrame::kVadUnknown; + num_channels_ = num_proc_channels_; + data_->set_num_channels(num_proc_channels_); + if (split_data_.get()) { + split_data_->set_num_channels(num_proc_channels_); + } +} + +const int16_t* const* AudioBuffer::channels_const() const { + return data_->ibuf_const()->channels(); +} + +int16_t* const* AudioBuffer::channels() { + mixed_low_pass_valid_ = false; + return data_->ibuf()->channels(); +} + +const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const { + return split_data_.get() ? + split_data_->ibuf_const()->bands(channel) : + data_->ibuf_const()->bands(channel); +} + +int16_t* const* AudioBuffer::split_bands(size_t channel) { + mixed_low_pass_valid_ = false; + return split_data_.get() ? + split_data_->ibuf()->bands(channel) : + data_->ibuf()->bands(channel); +} + +const int16_t* const* AudioBuffer::split_channels_const(Band band) const { + if (split_data_.get()) { + return split_data_->ibuf_const()->channels(band); + } else { + return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr; + } +} + +int16_t* const* AudioBuffer::split_channels(Band band) { + mixed_low_pass_valid_ = false; + if (split_data_.get()) { + return split_data_->ibuf()->channels(band); + } else { + return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr; + } +} + +ChannelBuffer<int16_t>* AudioBuffer::data() { + mixed_low_pass_valid_ = false; + return data_->ibuf(); +} + +const ChannelBuffer<int16_t>* AudioBuffer::data() const { + return data_->ibuf_const(); +} + +ChannelBuffer<int16_t>* AudioBuffer::split_data() { + mixed_low_pass_valid_ = false; + return split_data_.get() ? split_data_->ibuf() : data_->ibuf(); +} + +const ChannelBuffer<int16_t>* AudioBuffer::split_data() const { + return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const(); +} + +const float* const* AudioBuffer::channels_const_f() const { + return data_->fbuf_const()->channels(); +} + +float* const* AudioBuffer::channels_f() { + mixed_low_pass_valid_ = false; + return data_->fbuf()->channels(); +} + +const float* const* AudioBuffer::split_bands_const_f(size_t channel) const { + return split_data_.get() ? + split_data_->fbuf_const()->bands(channel) : + data_->fbuf_const()->bands(channel); +} + +float* const* AudioBuffer::split_bands_f(size_t channel) { + mixed_low_pass_valid_ = false; + return split_data_.get() ? + split_data_->fbuf()->bands(channel) : + data_->fbuf()->bands(channel); +} + +const float* const* AudioBuffer::split_channels_const_f(Band band) const { + if (split_data_.get()) { + return split_data_->fbuf_const()->channels(band); + } else { + return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr; + } +} + +float* const* AudioBuffer::split_channels_f(Band band) { + mixed_low_pass_valid_ = false; + if (split_data_.get()) { + return split_data_->fbuf()->channels(band); + } else { + return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr; + } +} + +ChannelBuffer<float>* AudioBuffer::data_f() { + mixed_low_pass_valid_ = false; + return data_->fbuf(); +} + +const ChannelBuffer<float>* AudioBuffer::data_f() const { + return data_->fbuf_const(); +} + +ChannelBuffer<float>* AudioBuffer::split_data_f() { + mixed_low_pass_valid_ = false; + return split_data_.get() ? split_data_->fbuf() : data_->fbuf(); +} + +const ChannelBuffer<float>* AudioBuffer::split_data_f() const { + return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const(); +} + +const int16_t* AudioBuffer::mixed_low_pass_data() { + if (num_proc_channels_ == 1) { + return split_bands_const(0)[kBand0To8kHz]; + } + + if (!mixed_low_pass_valid_) { + if (!mixed_low_pass_channels_.get()) { + mixed_low_pass_channels_.reset( + new ChannelBuffer<int16_t>(num_split_frames_, 1)); + } + + DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz), + num_split_frames_, num_channels_, + mixed_low_pass_channels_->channels()[0]); + mixed_low_pass_valid_ = true; + } + return mixed_low_pass_channels_->channels()[0]; +} + +const int16_t* AudioBuffer::low_pass_reference(int channel) const { + if (!reference_copied_) { + return NULL; + } + + return low_pass_reference_channels_->channels()[channel]; +} + +const float* AudioBuffer::keyboard_data() const { + return keyboard_data_; +} + +void AudioBuffer::set_activity(AudioFrame::VADActivity activity) { + activity_ = activity; +} + +AudioFrame::VADActivity AudioBuffer::activity() const { + return activity_; +} + +size_t AudioBuffer::num_channels() const { + return num_channels_; +} + +void AudioBuffer::set_num_channels(size_t num_channels) { + num_channels_ = num_channels; + data_->set_num_channels(num_channels); + if (split_data_.get()) { + split_data_->set_num_channels(num_channels); + } +} + +size_t AudioBuffer::num_frames() const { + return proc_num_frames_; +} + +size_t AudioBuffer::num_frames_per_band() const { + return num_split_frames_; +} + +size_t AudioBuffer::num_keyboard_frames() const { + // We don't resample the keyboard channel. + return input_num_frames_; +} + +size_t AudioBuffer::num_bands() const { + return num_bands_; +} + +// The resampler is only for supporting 48kHz to 16kHz in the reverse stream. +void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { + RTC_DCHECK_EQ(frame->num_channels_, num_input_channels_); + RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_); + InitForNewData(); + // Initialized lazily because there's a different condition in CopyFrom. + if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) { + input_buffer_.reset( + new IFChannelBuffer(input_num_frames_, num_proc_channels_)); + } + activity_ = frame->vad_activity_; + + int16_t* const* deinterleaved; + if (input_num_frames_ == proc_num_frames_) { + deinterleaved = data_->ibuf()->channels(); + } else { + deinterleaved = input_buffer_->ibuf()->channels(); + } + // TODO(yujo): handle muted frames more efficiently. + if (num_proc_channels_ == 1) { + // Downmix and deinterleave simultaneously. + DownmixInterleavedToMono(frame->data(), input_num_frames_, + num_input_channels_, deinterleaved[0]); + } else { + RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_); + Deinterleave(frame->data(), + input_num_frames_, + num_proc_channels_, + deinterleaved); + } + + // Resample. + if (input_num_frames_ != proc_num_frames_) { + for (size_t i = 0; i < num_proc_channels_; ++i) { + input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i], + input_num_frames_, + data_->fbuf()->channels()[i], + proc_num_frames_); + } + } +} + +void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { + frame->vad_activity_ = activity_; + if (!data_changed) { + return; + } + + RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1); + RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_); + + // Resample if necessary. + IFChannelBuffer* data_ptr = data_.get(); + if (proc_num_frames_ != output_num_frames_) { + for (size_t i = 0; i < num_channels_; ++i) { + output_resamplers_[i]->Resample( + data_->fbuf()->channels()[i], proc_num_frames_, + output_buffer_->fbuf()->channels()[i], output_num_frames_); + } + data_ptr = output_buffer_.get(); + } + + // TODO(yujo): handle muted frames more efficiently. + if (frame->num_channels_ == num_channels_) { + Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_, + frame->mutable_data()); + } else { + UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_, + frame->num_channels_, frame->mutable_data()); + } +} + +void AudioBuffer::CopyLowPassToReference() { + reference_copied_ = true; + if (!low_pass_reference_channels_.get() || + low_pass_reference_channels_->num_channels() != num_channels_) { + low_pass_reference_channels_.reset( + new ChannelBuffer<int16_t>(num_split_frames_, + num_proc_channels_)); + } + for (size_t i = 0; i < num_proc_channels_; i++) { + memcpy(low_pass_reference_channels_->channels()[i], + split_bands_const(i)[kBand0To8kHz], + low_pass_reference_channels_->num_frames_per_band() * + sizeof(split_bands_const(i)[kBand0To8kHz][0])); + } +} + +void AudioBuffer::SplitIntoFrequencyBands() { + splitting_filter_->Analysis(data_.get(), split_data_.get()); +} + +void AudioBuffer::MergeFrequencyBands() { + splitting_filter_->Synthesis(split_data_.get(), data_.get()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer.h new file mode 100644 index 0000000000..8451bdeeaa --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ + +#include <memory> +#include <vector> + +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/splitting_filter.h" +#include "modules/include/module_common_types.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class PushSincResampler; +class IFChannelBuffer; + +enum Band { + kBand0To8kHz = 0, + kBand8To16kHz = 1, + kBand16To24kHz = 2 +}; + +class AudioBuffer { + public: + // TODO(ajm): Switch to take ChannelLayouts. + AudioBuffer(size_t input_num_frames, + size_t num_input_channels, + size_t process_num_frames, + size_t num_process_channels, + size_t output_num_frames); + virtual ~AudioBuffer(); + + size_t num_channels() const; + void set_num_channels(size_t num_channels); + size_t num_frames() const; + size_t num_frames_per_band() const; + size_t num_keyboard_frames() const; + size_t num_bands() const; + + // Returns a pointer array to the full-band channels. + // Usage: + // channels()[channel][sample]. + // Where: + // 0 <= channel < |num_proc_channels_| + // 0 <= sample < |proc_num_frames_| + int16_t* const* channels(); + const int16_t* const* channels_const() const; + float* const* channels_f(); + const float* const* channels_const_f() const; + + // Returns a pointer array to the bands for a specific channel. + // Usage: + // split_bands(channel)[band][sample]. + // Where: + // 0 <= channel < |num_proc_channels_| + // 0 <= band < |num_bands_| + // 0 <= sample < |num_split_frames_| + int16_t* const* split_bands(size_t channel); + const int16_t* const* split_bands_const(size_t channel) const; + float* const* split_bands_f(size_t channel); + const float* const* split_bands_const_f(size_t channel) const; + + // Returns a pointer array to the channels for a specific band. + // Usage: + // split_channels(band)[channel][sample]. + // Where: + // 0 <= band < |num_bands_| + // 0 <= channel < |num_proc_channels_| + // 0 <= sample < |num_split_frames_| + int16_t* const* split_channels(Band band); + const int16_t* const* split_channels_const(Band band) const; + float* const* split_channels_f(Band band); + const float* const* split_channels_const_f(Band band) const; + + // Returns a pointer to the ChannelBuffer that encapsulates the full-band + // data. + ChannelBuffer<int16_t>* data(); + const ChannelBuffer<int16_t>* data() const; + ChannelBuffer<float>* data_f(); + const ChannelBuffer<float>* data_f() const; + + // Returns a pointer to the ChannelBuffer that encapsulates the split data. + ChannelBuffer<int16_t>* split_data(); + const ChannelBuffer<int16_t>* split_data() const; + ChannelBuffer<float>* split_data_f(); + const ChannelBuffer<float>* split_data_f() const; + + // Returns a pointer to the low-pass data downmixed to mono. If this data + // isn't already available it re-calculates it. + const int16_t* mixed_low_pass_data(); + const int16_t* low_pass_reference(int channel) const; + + const float* keyboard_data() const; + + void set_activity(AudioFrame::VADActivity activity); + AudioFrame::VADActivity activity() const; + + // Use for int16 interleaved data. + void DeinterleaveFrom(AudioFrame* audioFrame); + // If |data_changed| is false, only the non-audio data members will be copied + // to |frame|. + void InterleaveTo(AudioFrame* frame, bool data_changed) const; + + // Use for float deinterleaved data. + void CopyFrom(const float* const* data, const StreamConfig& stream_config); + void CopyTo(const StreamConfig& stream_config, float* const* data); + void CopyLowPassToReference(); + + // Splits the signal into different bands. + void SplitIntoFrequencyBands(); + // Recombine the different bands into one signal. + void MergeFrequencyBands(); + + private: + FRIEND_TEST_ALL_PREFIXES(AudioBufferTest, + SetNumChannelsSetsChannelBuffersNumChannels); + // Called from DeinterleaveFrom() and CopyFrom(). + void InitForNewData(); + + // The audio is passed into DeinterleaveFrom() or CopyFrom() with input + // format (samples per channel and number of channels). + const size_t input_num_frames_; + const size_t num_input_channels_; + // The audio is stored by DeinterleaveFrom() or CopyFrom() with processing + // format. + const size_t proc_num_frames_; + const size_t num_proc_channels_; + // The audio is returned by InterleaveTo() and CopyTo() with output samples + // per channels and the current number of channels. This last one can be + // changed at any time using set_num_channels(). + const size_t output_num_frames_; + size_t num_channels_; + + size_t num_bands_; + size_t num_split_frames_; + bool mixed_low_pass_valid_; + bool reference_copied_; + AudioFrame::VADActivity activity_; + + const float* keyboard_data_; + std::unique_ptr<IFChannelBuffer> data_; + std::unique_ptr<IFChannelBuffer> split_data_; + std::unique_ptr<SplittingFilter> splitting_filter_; + std::unique_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_; + std::unique_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_; + std::unique_ptr<IFChannelBuffer> input_buffer_; + std::unique_ptr<IFChannelBuffer> output_buffer_; + std::unique_ptr<ChannelBuffer<float> > process_buffer_; + std::vector<std::unique_ptr<PushSincResampler>> input_resamplers_; + std::vector<std::unique_ptr<PushSincResampler>> output_resamplers_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer_unittest.cc new file mode 100644 index 0000000000..4cbb98eb50 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer_unittest.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +const size_t kNumFrames = 480u; +const size_t kStereo = 2u; +const size_t kMono = 1u; + +void ExpectNumChannels(const AudioBuffer& ab, size_t num_channels) { + EXPECT_EQ(ab.data()->num_channels(), num_channels); + EXPECT_EQ(ab.data_f()->num_channels(), num_channels); + EXPECT_EQ(ab.split_data()->num_channels(), num_channels); + EXPECT_EQ(ab.split_data_f()->num_channels(), num_channels); + EXPECT_EQ(ab.num_channels(), num_channels); +} + +} // namespace + +TEST(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels) { + AudioBuffer ab(kNumFrames, kStereo, kNumFrames, kStereo, kNumFrames); + ExpectNumChannels(ab, kStereo); + ab.set_num_channels(kMono); + ExpectNumChannels(ab, kMono); + ab.InitForNewData(); + ExpectNumChannels(ab, kStereo); +} + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(AudioBufferTest, SetNumChannelsDeathTest) { + AudioBuffer ab(kNumFrames, kMono, kNumFrames, kMono, kNumFrames); + EXPECT_DEATH(ab.set_num_channels(kStereo), "num_channels"); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_c_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_c_gn/moz.build new file mode 100644 index 0000000000..ae9fa47e0b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_c_gn/moz.build @@ -0,0 +1,317 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/", + "/third_party/libwebrtc/webrtc/common_audio/resampler/include/", + "/third_party/libwebrtc/webrtc/common_audio/signal_processing/include/", + "/third_party/libwebrtc/webrtc/common_audio/vad/include/" +] + +SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/analog_agc.c" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/agc/legacy/digital_agc.c" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_c.c" + ] + +if CONFIG["CPU_ARCH"] == "arm": + + CFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_c.c" + ] + +if CONFIG["CPU_ARCH"] == "mips64": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "ppc64": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "x86": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Android": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0920" + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "FreeBSD": + + CFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "FreeBSD": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "NetBSD": + + CFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "NetBSD": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "OpenBSD": + + CFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "OpenBSD": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "WINNT": + + UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c", + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c" + ] + +Library("audio_processing_c_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_gn/moz.build new file mode 100644 index 0000000000..63737c372a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_gn/moz.build @@ -0,0 +1,370 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "1" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_INTELLIGIBILITY_ENHANCER"] = "0" +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/", + "/third_party/libwebrtc/webrtc/common_audio/resampler/include/", + "/third_party/libwebrtc/webrtc/common_audio/signal_processing/include/", + "/third_party/libwebrtc/webrtc/common_audio/vad/include/", + "/third_party/libwebrtc/webrtc/modules/audio_coding/codecs/isac/main/include/" +] + +SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec/echo_cancellation.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_c.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/echo_control_mobile.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_impl.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_impl.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_impl.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_resampler.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_common.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec3_fft.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/aec_state.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_framer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/block_processor_metrics.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/decimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_canceller3.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_path_variability.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erl_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/erle_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/frame_blocker.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/output_selector.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_buffer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/render_signal_analyzer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/subtractor.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec3/suppression_gain.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/agc/agc_manager_direct.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/agc/loudness_histogram.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/agc/utility.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/agc2/gain_controller2.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/audio_buffer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_for_experimental_agc.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/include/config.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/biquad_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/down_sampler.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_applier.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_selector.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/signal_classifier.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_impl.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/logging/apm_data_dumper.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/three_band_filter_bank.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/typing_detection.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_impl.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_NS_FLOAT"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + + OS_LIBS += [ + "m", + "rt" + ] + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + DEFINES["WEBRTC_NS_FIXED"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + DEFINES["WEBRTC_NS_FIXED"] = True + +if CONFIG["CPU_ARCH"] == "mips64": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "ppc64": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "x86": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0920" + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "FreeBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "NetBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "OpenBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["WEBRTC_NS_FLOAT"] = True + +Library("audio_processing_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl.cc new file mode 100644 index 0000000000..d61ab97c6e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -0,0 +1,2011 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_processing_impl.h" + +#include <math.h> +#include <algorithm> +#include <string> + +#include "common_audio/audio_converter.h" +#include "common_audio/channel_buffer.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aec/aec_core.h" +#include "modules/audio_processing/aec3/echo_canceller3.h" +#include "modules/audio_processing/agc/agc_manager_direct.h" +#include "modules/audio_processing/agc2/gain_controller2.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/beamformer/nonlinear_beamformer.h" +#include "modules/audio_processing/common.h" +#include "modules/audio_processing/echo_cancellation_impl.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/gain_control_for_experimental_agc.h" +#include "modules/audio_processing/gain_control_impl.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/platform_file.h" +#include "rtc_base/refcountedobject.h" +#include "rtc_base/trace_event.h" +#if WEBRTC_INTELLIGIBILITY_ENHANCER +#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" +#endif +#include "modules/audio_processing/level_controller/level_controller.h" +#include "modules/audio_processing/level_estimator_impl.h" +#include "modules/audio_processing/low_cut_filter.h" +#include "modules/audio_processing/noise_suppression_impl.h" +#include "modules/audio_processing/residual_echo_detector.h" +#include "modules/audio_processing/transient/transient_suppressor.h" +#include "modules/audio_processing/voice_detection_impl.h" +#include "modules/include/module_common_types.h" +#include "system_wrappers/include/file_wrapper.h" +#include "system_wrappers/include/metrics.h" + +// Check to verify that the define for the intelligibility enhancer is properly +// set. +#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \ + (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \ + WEBRTC_INTELLIGIBILITY_ENHANCER != 1) +#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1" +#endif + +#define RETURN_ON_ERR(expr) \ + do { \ + int err = (expr); \ + if (err != kNoError) { \ + return err; \ + } \ + } while (0) + +namespace webrtc { + +constexpr int AudioProcessing::kNativeSampleRatesHz[]; + +namespace { + +static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kStereo: + return false; + case AudioProcessing::kMonoAndKeyboard: + case AudioProcessing::kStereoAndKeyboard: + return true; + } + + RTC_NOTREACHED(); + return false; +} + +bool SampleRateSupportsMultiBand(int sample_rate_hz) { + return sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz; +} + +int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) { +#ifdef WEBRTC_ARCH_ARM_FAMILY + constexpr int kMaxSplittingNativeProcessRate = + AudioProcessing::kSampleRate32kHz; +#else + constexpr int kMaxSplittingNativeProcessRate = + AudioProcessing::kSampleRate48kHz; +#endif + static_assert( + kMaxSplittingNativeProcessRate <= AudioProcessing::kMaxNativeSampleRateHz, + ""); + const int uppermost_native_rate = band_splitting_required + ? kMaxSplittingNativeProcessRate + : AudioProcessing::kSampleRate48kHz; + + for (auto rate : AudioProcessing::kNativeSampleRatesHz) { + if (rate >= uppermost_native_rate) { + return uppermost_native_rate; + } + if (rate >= minimum_rate) { + return rate; + } + } + RTC_NOTREACHED(); + return uppermost_native_rate; +} + +// Maximum lengths that frame of samples being passed from the render side to +// the capture side can have (does not apply to AEC3). +static const size_t kMaxAllowedValuesOfSamplesPerBand = 160; +static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480; + +// Maximum number of frames to buffer in the render queue. +// TODO(peah): Decrease this once we properly handle hugely unbalanced +// reverse and forward call numbers. +static const size_t kMaxNumFramesToBuffer = 100; + +class HighPassFilterImpl : public HighPassFilter { + public: + explicit HighPassFilterImpl(AudioProcessingImpl* apm) : apm_(apm) {} + ~HighPassFilterImpl() override = default; + + // HighPassFilter implementation. + int Enable(bool enable) override { + apm_->MutateConfig([enable](AudioProcessing::Config* config) { + config->high_pass_filter.enabled = enable; + }); + + return AudioProcessing::kNoError; + } + + bool is_enabled() const override { + return apm_->GetConfig().high_pass_filter.enabled; + } + + private: + AudioProcessingImpl* apm_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(HighPassFilterImpl); +}; + +webrtc::InternalAPMStreamsConfig ToStreamsConfig( + const ProcessingConfig& api_format) { + webrtc::InternalAPMStreamsConfig result; + result.input_sample_rate = api_format.input_stream().sample_rate_hz(); + result.input_num_channels = api_format.input_stream().num_channels(); + result.output_num_channels = api_format.output_stream().num_channels(); + result.render_input_num_channels = + api_format.reverse_input_stream().num_channels(); + result.render_input_sample_rate = + api_format.reverse_input_stream().sample_rate_hz(); + result.output_sample_rate = api_format.output_stream().sample_rate_hz(); + result.render_output_sample_rate = + api_format.reverse_output_stream().sample_rate_hz(); + result.render_output_num_channels = + api_format.reverse_output_stream().num_channels(); + return result; +} +} // namespace + +// Throughout webrtc, it's assumed that success is represented by zero. +static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); + +AudioProcessingImpl::ApmSubmoduleStates::ApmSubmoduleStates( + bool capture_post_processor_enabled) + : capture_post_processor_enabled_(capture_post_processor_enabled) {} + +bool AudioProcessingImpl::ApmSubmoduleStates::Update( + bool low_cut_filter_enabled, + bool echo_canceller_enabled, + bool mobile_echo_controller_enabled, + bool residual_echo_detector_enabled, + bool noise_suppressor_enabled, + bool intelligibility_enhancer_enabled, + bool beamformer_enabled, + bool adaptive_gain_controller_enabled, + bool gain_controller2_enabled, + bool level_controller_enabled, + bool echo_controller_enabled, + bool voice_activity_detector_enabled, + bool level_estimator_enabled, + bool transient_suppressor_enabled) { + bool changed = false; + changed |= (low_cut_filter_enabled != low_cut_filter_enabled_); + changed |= (echo_canceller_enabled != echo_canceller_enabled_); + changed |= + (mobile_echo_controller_enabled != mobile_echo_controller_enabled_); + changed |= + (residual_echo_detector_enabled != residual_echo_detector_enabled_); + changed |= (noise_suppressor_enabled != noise_suppressor_enabled_); + changed |= + (intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_); + changed |= (beamformer_enabled != beamformer_enabled_); + changed |= + (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); + changed |= + (gain_controller2_enabled != gain_controller2_enabled_); + changed |= (level_controller_enabled != level_controller_enabled_); + changed |= (echo_controller_enabled != echo_controller_enabled_); + changed |= (level_estimator_enabled != level_estimator_enabled_); + changed |= + (voice_activity_detector_enabled != voice_activity_detector_enabled_); + changed |= (transient_suppressor_enabled != transient_suppressor_enabled_); + if (changed) { + low_cut_filter_enabled_ = low_cut_filter_enabled; + echo_canceller_enabled_ = echo_canceller_enabled; + mobile_echo_controller_enabled_ = mobile_echo_controller_enabled; + residual_echo_detector_enabled_ = residual_echo_detector_enabled; + noise_suppressor_enabled_ = noise_suppressor_enabled; + intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled; + beamformer_enabled_ = beamformer_enabled; + adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; + gain_controller2_enabled_ = gain_controller2_enabled; + level_controller_enabled_ = level_controller_enabled; + echo_controller_enabled_ = echo_controller_enabled; + level_estimator_enabled_ = level_estimator_enabled; + voice_activity_detector_enabled_ = voice_activity_detector_enabled; + transient_suppressor_enabled_ = transient_suppressor_enabled; + } + + changed |= first_update_; + first_update_ = false; + return changed; +} + +bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive() + const { +#if WEBRTC_INTELLIGIBILITY_ENHANCER + return CaptureMultiBandProcessingActive() || + intelligibility_enhancer_enabled_ || voice_activity_detector_enabled_; +#else + return CaptureMultiBandProcessingActive() || voice_activity_detector_enabled_; +#endif +} + +bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive() + const { + return low_cut_filter_enabled_ || echo_canceller_enabled_ || + mobile_echo_controller_enabled_ || noise_suppressor_enabled_ || + beamformer_enabled_ || adaptive_gain_controller_enabled_ || + echo_controller_enabled_; +} + +bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive() + const { + return level_controller_enabled_ || gain_controller2_enabled_ || + capture_post_processor_enabled_; +} + +bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandSubModulesActive() + const { + return RenderMultiBandProcessingActive() || echo_canceller_enabled_ || + mobile_echo_controller_enabled_ || adaptive_gain_controller_enabled_ || + echo_controller_enabled_; +} + +bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandProcessingActive() + const { +#if WEBRTC_INTELLIGIBILITY_ENHANCER + return intelligibility_enhancer_enabled_; +#else + return false; +#endif +} + +struct AudioProcessingImpl::ApmPublicSubmodules { + ApmPublicSubmodules() {} + // Accessed externally of APM without any lock acquired. + std::unique_ptr<EchoCancellationImpl> echo_cancellation; + std::unique_ptr<EchoControlMobileImpl> echo_control_mobile; + std::unique_ptr<GainControlImpl> gain_control; + std::unique_ptr<LevelEstimatorImpl> level_estimator; + std::unique_ptr<NoiseSuppressionImpl> noise_suppression; + std::unique_ptr<VoiceDetectionImpl> voice_detection; + std::unique_ptr<GainControlForExperimentalAgc> + gain_control_for_experimental_agc; + + // Accessed internally from both render and capture. + std::unique_ptr<TransientSuppressor> transient_suppressor; +#if WEBRTC_INTELLIGIBILITY_ENHANCER + std::unique_ptr<IntelligibilityEnhancer> intelligibility_enhancer; +#endif +}; + +struct AudioProcessingImpl::ApmPrivateSubmodules { + ApmPrivateSubmodules(NonlinearBeamformer* beamformer, + std::unique_ptr<PostProcessing> capture_post_processor) + : beamformer(beamformer), + capture_post_processor(std::move(capture_post_processor)) {} + // Accessed internally from capture or during initialization + std::unique_ptr<NonlinearBeamformer> beamformer; + std::unique_ptr<AgcManagerDirect> agc_manager; + std::unique_ptr<GainController2> gain_controller2; + std::unique_ptr<LowCutFilter> low_cut_filter; + std::unique_ptr<LevelController> level_controller; + std::unique_ptr<ResidualEchoDetector> residual_echo_detector; + std::unique_ptr<EchoControl> echo_controller; + std::unique_ptr<PostProcessing> capture_post_processor; +}; + +AudioProcessing* AudioProcessing::Create() { + webrtc::Config config; + return Create(config, nullptr, nullptr, nullptr); +} + +AudioProcessing* AudioProcessing::Create(const webrtc::Config& config) { + return Create(config, nullptr, nullptr, nullptr); +} + +AudioProcessing* AudioProcessing::Create(const webrtc::Config& config, + NonlinearBeamformer* beamformer) { + return Create(config, nullptr, nullptr, beamformer); +} + +AudioProcessing* AudioProcessing::Create( + const webrtc::Config& config, + std::unique_ptr<PostProcessing> capture_post_processor, + std::unique_ptr<EchoControlFactory> echo_control_factory, + NonlinearBeamformer* beamformer) { + AudioProcessingImpl* apm = new rtc::RefCountedObject<AudioProcessingImpl>( + config, std::move(capture_post_processor), + std::move(echo_control_factory), beamformer); + if (apm->Initialize() != kNoError) { + delete apm; + apm = nullptr; + } + + return apm; +} + +AudioProcessingImpl::AudioProcessingImpl(const webrtc::Config& config) + : AudioProcessingImpl(config, nullptr, nullptr, nullptr) {} + +AudioProcessingImpl::AudioProcessingImpl( + const webrtc::Config& config, + std::unique_ptr<PostProcessing> capture_post_processor, + std::unique_ptr<EchoControlFactory> echo_control_factory, + NonlinearBeamformer* beamformer) + : high_pass_filter_impl_(new HighPassFilterImpl(this)), + echo_control_factory_(std::move(echo_control_factory)), + submodule_states_(!!capture_post_processor), + public_submodules_(new ApmPublicSubmodules()), + private_submodules_( + new ApmPrivateSubmodules(beamformer, + std::move(capture_post_processor))), + constants_(config.Get<ExperimentalAgc>().startup_min_volume, + config.Get<ExperimentalAgc>().clipped_level_min, +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) + false), +#else + config.Get<ExperimentalAgc>().enabled), +#endif +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) + capture_(false, +#else + capture_(config.Get<ExperimentalNs>().enabled, +#endif + config.Get<Beamforming>().array_geometry, + config.Get<Beamforming>().target_direction), + capture_nonlocked_(config.Get<Beamforming>().enabled, + config.Get<Intelligibility>().enabled) { + { + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + + // Mark Echo Controller enabled if a factory is injected. + capture_nonlocked_.echo_controller_enabled = + static_cast<bool>(echo_control_factory_); + + public_submodules_->echo_cancellation.reset( + new EchoCancellationImpl(&crit_render_, &crit_capture_)); + public_submodules_->echo_control_mobile.reset( + new EchoControlMobileImpl(&crit_render_, &crit_capture_)); + public_submodules_->gain_control.reset( + new GainControlImpl(&crit_capture_, &crit_capture_)); + public_submodules_->level_estimator.reset( + new LevelEstimatorImpl(&crit_capture_)); + public_submodules_->noise_suppression.reset( + new NoiseSuppressionImpl(&crit_capture_)); + public_submodules_->voice_detection.reset( + new VoiceDetectionImpl(&crit_capture_)); + public_submodules_->gain_control_for_experimental_agc.reset( + new GainControlForExperimentalAgc( + public_submodules_->gain_control.get(), &crit_capture_)); + private_submodules_->residual_echo_detector.reset( + new ResidualEchoDetector()); + + // TODO(peah): Move this creation to happen only when the level controller + // is enabled. + private_submodules_->level_controller.reset(new LevelController()); + + // TODO(alessiob): Move the injected gain controller once injection is + // implemented. + private_submodules_->gain_controller2.reset(new GainController2()); + + RTC_LOG(LS_INFO) << "Capture post processor activated: " + << !!private_submodules_->capture_post_processor; + } + + SetExtraOptions(config); +} + +AudioProcessingImpl::~AudioProcessingImpl() { + // Depends on gain_control_ and + // public_submodules_->gain_control_for_experimental_agc. + private_submodules_->agc_manager.reset(); + // Depends on gain_control_. + public_submodules_->gain_control_for_experimental_agc.reset(); +} + +int AudioProcessingImpl::Initialize() { + // Run in a single-threaded manner during initialization. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + return InitializeLocked(); +} + +int AudioProcessingImpl::Initialize(int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + int render_input_sample_rate_hz, + ChannelLayout capture_input_layout, + ChannelLayout capture_output_layout, + ChannelLayout render_input_layout) { + const ProcessingConfig processing_config = { + {{capture_input_sample_rate_hz, ChannelsFromLayout(capture_input_layout), + LayoutHasKeyboard(capture_input_layout)}, + {capture_output_sample_rate_hz, + ChannelsFromLayout(capture_output_layout), + LayoutHasKeyboard(capture_output_layout)}, + {render_input_sample_rate_hz, ChannelsFromLayout(render_input_layout), + LayoutHasKeyboard(render_input_layout)}, + {render_input_sample_rate_hz, ChannelsFromLayout(render_input_layout), + LayoutHasKeyboard(render_input_layout)}}}; + + return Initialize(processing_config); +} + +int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) { + // Run in a single-threaded manner during initialization. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + return InitializeLocked(processing_config); +} + +int AudioProcessingImpl::MaybeInitializeRender( + const ProcessingConfig& processing_config) { + return MaybeInitialize(processing_config, false); +} + +int AudioProcessingImpl::MaybeInitializeCapture( + const ProcessingConfig& processing_config, + bool force_initialization) { + return MaybeInitialize(processing_config, force_initialization); +} + +// Calls InitializeLocked() if any of the audio parameters have changed from +// their current values (needs to be called while holding the crit_render_lock). +int AudioProcessingImpl::MaybeInitialize( + const ProcessingConfig& processing_config, + bool force_initialization) { + // Called from both threads. Thread check is therefore not possible. + if (processing_config == formats_.api_format && !force_initialization) { + return kNoError; + } + + rtc::CritScope cs_capture(&crit_capture_); + return InitializeLocked(processing_config); +} + +int AudioProcessingImpl::InitializeLocked() { + UpdateActiveSubmoduleStates(); + + const int capture_audiobuffer_num_channels = + capture_nonlocked_.beamformer_enabled + ? formats_.api_format.input_stream().num_channels() + : formats_.api_format.output_stream().num_channels(); + + const int render_audiobuffer_num_output_frames = + formats_.api_format.reverse_output_stream().num_frames() == 0 + ? formats_.render_processing_format.num_frames() + : formats_.api_format.reverse_output_stream().num_frames(); + if (formats_.api_format.reverse_input_stream().num_channels() > 0) { + render_.render_audio.reset(new AudioBuffer( + formats_.api_format.reverse_input_stream().num_frames(), + formats_.api_format.reverse_input_stream().num_channels(), + formats_.render_processing_format.num_frames(), + formats_.render_processing_format.num_channels(), + render_audiobuffer_num_output_frames)); + if (formats_.api_format.reverse_input_stream() != + formats_.api_format.reverse_output_stream()) { + render_.render_converter = AudioConverter::Create( + formats_.api_format.reverse_input_stream().num_channels(), + formats_.api_format.reverse_input_stream().num_frames(), + formats_.api_format.reverse_output_stream().num_channels(), + formats_.api_format.reverse_output_stream().num_frames()); + } else { + render_.render_converter.reset(nullptr); + } + } else { + render_.render_audio.reset(nullptr); + render_.render_converter.reset(nullptr); + } + + capture_.capture_audio.reset( + new AudioBuffer(formats_.api_format.input_stream().num_frames(), + formats_.api_format.input_stream().num_channels(), + capture_nonlocked_.capture_processing_format.num_frames(), + capture_audiobuffer_num_channels, + formats_.api_format.output_stream().num_frames())); + + public_submodules_->echo_cancellation->Initialize( + proc_sample_rate_hz(), num_reverse_channels(), num_output_channels(), + num_proc_channels()); + AllocateRenderQueue(); + + int success = public_submodules_->echo_cancellation->enable_metrics(true); + RTC_DCHECK_EQ(0, success); + success = public_submodules_->echo_cancellation->enable_delay_logging(true); + RTC_DCHECK_EQ(0, success); + public_submodules_->echo_control_mobile->Initialize( + proc_split_sample_rate_hz(), num_reverse_channels(), + num_output_channels()); + + public_submodules_->gain_control->Initialize(num_proc_channels(), + proc_sample_rate_hz()); + if (constants_.use_experimental_agc) { + if (!private_submodules_->agc_manager.get()) { + private_submodules_->agc_manager.reset(new AgcManagerDirect( + public_submodules_->gain_control.get(), + public_submodules_->gain_control_for_experimental_agc.get(), + constants_.agc_startup_min_volume, constants_.agc_clipped_level_min)); + } + private_submodules_->agc_manager->Initialize(); + private_submodules_->agc_manager->SetCaptureMuted( + capture_.output_will_be_muted); + public_submodules_->gain_control_for_experimental_agc->Initialize(); + } + InitializeTransient(); + InitializeBeamformer(); +#if WEBRTC_INTELLIGIBILITY_ENHANCER + InitializeIntelligibility(); +#endif + InitializeLowCutFilter(); + public_submodules_->noise_suppression->Initialize(num_proc_channels(), + proc_sample_rate_hz()); + public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz()); + public_submodules_->level_estimator->Initialize(); + InitializeLevelController(); + InitializeResidualEchoDetector(); + InitializeEchoController(); + InitializeGainController2(); + InitializePostProcessor(); + + if (aec_dump_) { + aec_dump_->WriteInitMessage(ToStreamsConfig(formats_.api_format)); + } + return kNoError; +} + +int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { + UpdateActiveSubmoduleStates(); + + for (const auto& stream : config.streams) { + if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) { + return kBadSampleRateError; + } + } + + const size_t num_in_channels = config.input_stream().num_channels(); + const size_t num_out_channels = config.output_stream().num_channels(); + + // Need at least one input channel. + // Need either one output channel or as many outputs as there are inputs. + if (num_in_channels == 0 || + !(num_out_channels == 1 || num_out_channels == num_in_channels)) { + return kBadNumberChannelsError; + } + + if (capture_nonlocked_.beamformer_enabled && + num_in_channels != capture_.array_geometry.size()) { + return kBadNumberChannelsError; + } + + formats_.api_format = config; + + int capture_processing_rate = FindNativeProcessRateToUse( + std::min(formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.output_stream().sample_rate_hz()), + submodule_states_.CaptureMultiBandSubModulesActive() || + submodule_states_.RenderMultiBandSubModulesActive()); + + capture_nonlocked_.capture_processing_format = + StreamConfig(capture_processing_rate); + + int render_processing_rate; + if (!capture_nonlocked_.echo_controller_enabled) { + render_processing_rate = FindNativeProcessRateToUse( + std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_output_stream().sample_rate_hz()), + submodule_states_.CaptureMultiBandSubModulesActive() || + submodule_states_.RenderMultiBandSubModulesActive()); + } else { + render_processing_rate = capture_processing_rate; + } + + // TODO(aluebs): Remove this restriction once we figure out why the 3-band + // splitting filter degrades the AEC performance. + if (render_processing_rate > kSampleRate32kHz && + !capture_nonlocked_.echo_controller_enabled) { + render_processing_rate = submodule_states_.RenderMultiBandProcessingActive() + ? kSampleRate32kHz + : kSampleRate16kHz; + } + + // If the forward sample rate is 8 kHz, the render stream is also processed + // at this rate. + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate8kHz) { + render_processing_rate = kSampleRate8kHz; + } else { + render_processing_rate = + std::max(render_processing_rate, static_cast<int>(kSampleRate16kHz)); + } + + // Always downmix the render stream to mono for analysis. This has been + // demonstrated to work well for AEC in most practical scenarios. + if (submodule_states_.RenderMultiBandSubModulesActive()) { + formats_.render_processing_format = StreamConfig(render_processing_rate, 1); + } else { + formats_.render_processing_format = StreamConfig( + formats_.api_format.reverse_input_stream().sample_rate_hz(), + formats_.api_format.reverse_input_stream().num_channels()); + } + + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate32kHz || + capture_nonlocked_.capture_processing_format.sample_rate_hz() == + kSampleRate48kHz) { + capture_nonlocked_.split_rate = kSampleRate16kHz; + } else { + capture_nonlocked_.split_rate = + capture_nonlocked_.capture_processing_format.sample_rate_hz(); + } + + return InitializeLocked(); +} + +void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { + config_ = config; + + bool config_ok = LevelController::Validate(config_.level_controller); + if (!config_ok) { + RTC_LOG(LS_ERROR) << "AudioProcessing module config error" << std::endl + << "level_controller: " + << LevelController::ToString(config_.level_controller) + << std::endl + << "Reverting to default parameter set"; + config_.level_controller = AudioProcessing::Config::LevelController(); + } + + // Run in a single-threaded manner when applying the settings. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + + // TODO(peah): Replace the use of capture_nonlocked_.level_controller_enabled + // with the value in config_ everywhere in the code. + if (capture_nonlocked_.level_controller_enabled != + config_.level_controller.enabled) { + capture_nonlocked_.level_controller_enabled = + config_.level_controller.enabled; + // TODO(peah): Remove the conditional initialization to always initialize + // the level controller regardless of whether it is enabled or not. + InitializeLevelController(); + } + RTC_LOG(LS_INFO) << "Level controller activated: " + << capture_nonlocked_.level_controller_enabled; + + private_submodules_->level_controller->ApplyConfig(config_.level_controller); + + InitializeLowCutFilter(); + + RTC_LOG(LS_INFO) << "Highpass filter activated: " + << config_.high_pass_filter.enabled; + + // Deprecated way of activating AEC3. + // TODO(gustaf): Remove when possible. + if (config.echo_canceller3.enabled && !echo_control_factory_) { + capture_nonlocked_.echo_controller_enabled = + config_.echo_canceller3.enabled; + echo_control_factory_ = + std::unique_ptr<EchoControlFactory>(new EchoCanceller3Factory()); + InitializeEchoController(); + RTC_LOG(LS_INFO) << "Echo canceller 3 activated: " + << capture_nonlocked_.echo_controller_enabled; + } + + config_ok = GainController2::Validate(config_.gain_controller2); + if (!config_ok) { + RTC_LOG(LS_ERROR) << "AudioProcessing module config error" << std::endl + << "Gain Controller 2: " + << GainController2::ToString(config_.gain_controller2) + << std::endl + << "Reverting to default parameter set"; + config_.gain_controller2 = AudioProcessing::Config::GainController2(); + } + InitializeGainController2(); + private_submodules_->gain_controller2->ApplyConfig(config_.gain_controller2); + RTC_LOG(LS_INFO) << "Gain Controller 2 activated: " + << config_.gain_controller2.enabled; +} + +void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) { + // Run in a single-threaded manner when setting the extra options. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + + public_submodules_->echo_cancellation->SetExtraOptions(config); + + if (capture_.transient_suppressor_enabled != + config.Get<ExperimentalNs>().enabled) { + capture_.transient_suppressor_enabled = + config.Get<ExperimentalNs>().enabled; + InitializeTransient(); + } + +#if WEBRTC_INTELLIGIBILITY_ENHANCER + if (capture_nonlocked_.intelligibility_enabled != + config.Get<Intelligibility>().enabled) { + capture_nonlocked_.intelligibility_enabled = + config.Get<Intelligibility>().enabled; + InitializeIntelligibility(); + } +#endif + +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD + if (capture_nonlocked_.beamformer_enabled != + config.Get<Beamforming>().enabled) { + capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled; + if (config.Get<Beamforming>().array_geometry.size() > 1) { + capture_.array_geometry = config.Get<Beamforming>().array_geometry; + } + capture_.target_direction = config.Get<Beamforming>().target_direction; + InitializeBeamformer(); + } +#endif // WEBRTC_ANDROID_PLATFORM_BUILD +} + +int AudioProcessingImpl::proc_sample_rate_hz() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.capture_processing_format.sample_rate_hz(); +} + +int AudioProcessingImpl::proc_split_sample_rate_hz() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.split_rate; +} + +size_t AudioProcessingImpl::num_reverse_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.render_processing_format.num_channels(); +} + +size_t AudioProcessingImpl::num_input_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.input_stream().num_channels(); +} + +size_t AudioProcessingImpl::num_proc_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return (capture_nonlocked_.beamformer_enabled || + capture_nonlocked_.echo_controller_enabled) + ? 1 + : num_output_channels(); +} + +size_t AudioProcessingImpl::num_output_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.output_stream().num_channels(); +} + +void AudioProcessingImpl::set_output_will_be_muted(bool muted) { + rtc::CritScope cs(&crit_capture_); + capture_.output_will_be_muted = muted; + if (private_submodules_->agc_manager.get()) { + private_submodules_->agc_manager->SetCaptureMuted( + capture_.output_will_be_muted); + } +} + + +int AudioProcessingImpl::ProcessStream(const float* const* src, + size_t samples_per_channel, + int input_sample_rate_hz, + ChannelLayout input_layout, + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_ChannelLayout"); + StreamConfig input_stream; + StreamConfig output_stream; + { + // Access the formats_.api_format.input_stream beneath the capture lock. + // The lock must be released as it is later required in the call + // to ProcessStream(,,,); + rtc::CritScope cs(&crit_capture_); + input_stream = formats_.api_format.input_stream(); + output_stream = formats_.api_format.output_stream(); + } + + input_stream.set_sample_rate_hz(input_sample_rate_hz); + input_stream.set_num_channels(ChannelsFromLayout(input_layout)); + input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout)); + output_stream.set_sample_rate_hz(output_sample_rate_hz); + output_stream.set_num_channels(ChannelsFromLayout(output_layout)); + output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout)); + + if (samples_per_channel != input_stream.num_frames()) { + return kBadDataLengthError; + } + return ProcessStream(src, input_stream, output_stream, dest); +} + +int AudioProcessingImpl::ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig"); + ProcessingConfig processing_config; + bool reinitialization_required = false; + { + // Acquire the capture lock in order to safely call the function + // that retrieves the render side data. This function accesses apm + // getters that need the capture lock held when being called. + rtc::CritScope cs_capture(&crit_capture_); + EmptyQueuedRenderAudio(); + + if (!src || !dest) { + return kNullPointerError; + } + + processing_config = formats_.api_format; + reinitialization_required = UpdateActiveSubmoduleStates(); + } + + processing_config.input_stream() = input_config; + processing_config.output_stream() = output_config; + + { + // Do conditional reinitialization. + rtc::CritScope cs_render(&crit_render_); + RETURN_ON_ERR( + MaybeInitializeCapture(processing_config, reinitialization_required)); + } + rtc::CritScope cs_capture(&crit_capture_); + RTC_DCHECK_EQ(processing_config.input_stream().num_frames(), + formats_.api_format.input_stream().num_frames()); + + if (aec_dump_) { + RecordUnprocessedCaptureStream(src); + } + + capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream()); + RETURN_ON_ERR(ProcessCaptureStreamLocked()); + capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest); + + if (aec_dump_) { + RecordProcessedCaptureStream(dest); + } + return kNoError; +} + +void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) { + EchoCancellationImpl::PackRenderAudioBuffer(audio, num_output_channels(), + num_reverse_channels(), + &aec_render_queue_buffer_); + + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + + // Insert the samples into the queue. + if (!aec_render_signal_queue_->Insert(&aec_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = aec_render_signal_queue_->Insert(&aec_render_queue_buffer_); + RTC_DCHECK(result); + } + + EchoControlMobileImpl::PackRenderAudioBuffer(audio, num_output_channels(), + num_reverse_channels(), + &aecm_render_queue_buffer_); + + // Insert the samples into the queue. + if (!aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_); + RTC_DCHECK(result); + } + + if (!constants_.use_experimental_agc) { + GainControlImpl::PackRenderAudioBuffer(audio, &agc_render_queue_buffer_); + // Insert the samples into the queue. + if (!agc_render_signal_queue_->Insert(&agc_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = agc_render_signal_queue_->Insert(&agc_render_queue_buffer_); + RTC_DCHECK(result); + } + } +} + +void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) { + ResidualEchoDetector::PackRenderAudioBuffer(audio, &red_render_queue_buffer_); + + // Insert the samples into the queue. + if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); + + // Retry the insert (should always work). + bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_); + RTC_DCHECK(result); + } +} + +void AudioProcessingImpl::AllocateRenderQueue() { + const size_t new_aec_render_queue_element_max_size = + std::max(static_cast<size_t>(1), + kMaxAllowedValuesOfSamplesPerBand * + EchoCancellationImpl::NumCancellersRequired( + num_output_channels(), num_reverse_channels())); + + const size_t new_aecm_render_queue_element_max_size = + std::max(static_cast<size_t>(1), + kMaxAllowedValuesOfSamplesPerBand * + EchoControlMobileImpl::NumCancellersRequired( + num_output_channels(), num_reverse_channels())); + + const size_t new_agc_render_queue_element_max_size = + std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerBand); + + const size_t new_red_render_queue_element_max_size = + std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame); + + // Reallocate the queues if the queue item sizes are too small to fit the + // data to put in the queues. + if (aec_render_queue_element_max_size_ < + new_aec_render_queue_element_max_size) { + aec_render_queue_element_max_size_ = new_aec_render_queue_element_max_size; + + std::vector<float> template_queue_element( + aec_render_queue_element_max_size_); + + aec_render_signal_queue_.reset( + new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier<float>( + aec_render_queue_element_max_size_))); + + aec_render_queue_buffer_.resize(aec_render_queue_element_max_size_); + aec_capture_queue_buffer_.resize(aec_render_queue_element_max_size_); + } else { + aec_render_signal_queue_->Clear(); + } + + if (aecm_render_queue_element_max_size_ < + new_aecm_render_queue_element_max_size) { + aecm_render_queue_element_max_size_ = + new_aecm_render_queue_element_max_size; + + std::vector<int16_t> template_queue_element( + aecm_render_queue_element_max_size_); + + aecm_render_signal_queue_.reset( + new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier<int16_t>( + aecm_render_queue_element_max_size_))); + + aecm_render_queue_buffer_.resize(aecm_render_queue_element_max_size_); + aecm_capture_queue_buffer_.resize(aecm_render_queue_element_max_size_); + } else { + aecm_render_signal_queue_->Clear(); + } + + if (agc_render_queue_element_max_size_ < + new_agc_render_queue_element_max_size) { + agc_render_queue_element_max_size_ = new_agc_render_queue_element_max_size; + + std::vector<int16_t> template_queue_element( + agc_render_queue_element_max_size_); + + agc_render_signal_queue_.reset( + new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier<int16_t>( + agc_render_queue_element_max_size_))); + + agc_render_queue_buffer_.resize(agc_render_queue_element_max_size_); + agc_capture_queue_buffer_.resize(agc_render_queue_element_max_size_); + } else { + agc_render_signal_queue_->Clear(); + } + + if (red_render_queue_element_max_size_ < + new_red_render_queue_element_max_size) { + red_render_queue_element_max_size_ = new_red_render_queue_element_max_size; + + std::vector<float> template_queue_element( + red_render_queue_element_max_size_); + + red_render_signal_queue_.reset( + new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier<float>( + red_render_queue_element_max_size_))); + + red_render_queue_buffer_.resize(red_render_queue_element_max_size_); + red_capture_queue_buffer_.resize(red_render_queue_element_max_size_); + } else { + red_render_signal_queue_->Clear(); + } +} + +void AudioProcessingImpl::EmptyQueuedRenderAudio() { + rtc::CritScope cs_capture(&crit_capture_); + while (aec_render_signal_queue_->Remove(&aec_capture_queue_buffer_)) { + public_submodules_->echo_cancellation->ProcessRenderAudio( + aec_capture_queue_buffer_); + } + + while (aecm_render_signal_queue_->Remove(&aecm_capture_queue_buffer_)) { + public_submodules_->echo_control_mobile->ProcessRenderAudio( + aecm_capture_queue_buffer_); + } + + while (agc_render_signal_queue_->Remove(&agc_capture_queue_buffer_)) { + public_submodules_->gain_control->ProcessRenderAudio( + agc_capture_queue_buffer_); + } + + while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) { + private_submodules_->residual_echo_detector->AnalyzeRenderAudio( + red_capture_queue_buffer_); + } +} + +int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); + { + // Acquire the capture lock in order to safely call the function + // that retrieves the render side data. This function accesses apm + // getters that need the capture lock held when being called. + // The lock needs to be released as + // public_submodules_->echo_control_mobile->is_enabled() aquires this lock + // as well. + rtc::CritScope cs_capture(&crit_capture_); + EmptyQueuedRenderAudio(); + } + + if (!frame) { + return kNullPointerError; + } + // Must be a native rate. + if (frame->sample_rate_hz_ != kSampleRate8kHz && + frame->sample_rate_hz_ != kSampleRate16kHz && + frame->sample_rate_hz_ != kSampleRate32kHz && + frame->sample_rate_hz_ != kSampleRate48kHz) { + return kBadSampleRateError; + } + + ProcessingConfig processing_config; + bool reinitialization_required = false; + { + // Aquire lock for the access of api_format. + // The lock is released immediately due to the conditional + // reinitialization. + rtc::CritScope cs_capture(&crit_capture_); + // TODO(ajm): The input and output rates and channels are currently + // constrained to be identical in the int16 interface. + processing_config = formats_.api_format; + + reinitialization_required = UpdateActiveSubmoduleStates(); + } + processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_); + processing_config.input_stream().set_num_channels(frame->num_channels_); + processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_); + processing_config.output_stream().set_num_channels(frame->num_channels_); + + { + // Do conditional reinitialization. + rtc::CritScope cs_render(&crit_render_); + RETURN_ON_ERR( + MaybeInitializeCapture(processing_config, reinitialization_required)); + } + rtc::CritScope cs_capture(&crit_capture_); + if (frame->samples_per_channel_ != + formats_.api_format.input_stream().num_frames()) { + return kBadDataLengthError; + } + + if (aec_dump_) { + RecordUnprocessedCaptureStream(*frame); + } + + capture_.capture_audio->DeinterleaveFrom(frame); + RETURN_ON_ERR(ProcessCaptureStreamLocked()); + capture_.capture_audio->InterleaveTo( + frame, submodule_states_.CaptureMultiBandProcessingActive() || + submodule_states_.CaptureFullBandProcessingActive()); + + if (aec_dump_) { + RecordProcessedCaptureStream(*frame); + } + + return kNoError; +} + +int AudioProcessingImpl::ProcessCaptureStreamLocked() { + // Ensure that not both the AEC and AECM are active at the same time. + // TODO(peah): Simplify once the public API Enable functions for these + // are moved to APM. + RTC_DCHECK(!(public_submodules_->echo_cancellation->is_enabled() && + public_submodules_->echo_control_mobile->is_enabled())); + + MaybeUpdateHistograms(); + + AudioBuffer* capture_buffer = capture_.capture_audio.get(); // For brevity. + + capture_input_rms_.Analyze(rtc::ArrayView<const int16_t>( + capture_buffer->channels_const()[0], + capture_nonlocked_.capture_processing_format.num_frames())); + const bool log_rms = ++capture_rms_interval_counter_ >= 1000; + if (log_rms) { + capture_rms_interval_counter_ = 0; + RmsLevel::Levels levels = capture_input_rms_.AverageAndPeak(); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelAverageRms", + levels.average, 1, RmsLevel::kMinLevelDb, 64); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelPeakRms", + levels.peak, 1, RmsLevel::kMinLevelDb, 64); + } + + if (private_submodules_->echo_controller) { + // TODO(peah): Reactivate analogue AGC gain detection once the analogue AGC + // issues have been addressed. + capture_.echo_path_gain_change = false; + private_submodules_->echo_controller->AnalyzeCapture(capture_buffer); + } + + if (constants_.use_experimental_agc && + public_submodules_->gain_control->is_enabled()) { + private_submodules_->agc_manager->AnalyzePreProcess( + capture_buffer->channels()[0], capture_buffer->num_channels(), + capture_nonlocked_.capture_processing_format.num_frames()); + } + + if (submodule_states_.CaptureMultiBandSubModulesActive() && + SampleRateSupportsMultiBand( + capture_nonlocked_.capture_processing_format.sample_rate_hz())) { + capture_buffer->SplitIntoFrequencyBands(); + } + + if (private_submodules_->echo_controller) { + // Force down-mixing of the number of channels after the detection of + // capture signal saturation. + // TODO(peah): Look into ensuring that this kind of tampering with the + // AudioBuffer functionality should not be needed. + capture_buffer->set_num_channels(1); + } + + if (capture_nonlocked_.beamformer_enabled) { + private_submodules_->beamformer->AnalyzeChunk( + *capture_buffer->split_data_f()); + // Discards all channels by the leftmost one. + capture_buffer->set_num_channels(1); + } + + // TODO(peah): Move the AEC3 low-cut filter to this place. + if (private_submodules_->low_cut_filter && + !private_submodules_->echo_controller) { + private_submodules_->low_cut_filter->Process(capture_buffer); + } + RETURN_ON_ERR( + public_submodules_->gain_control->AnalyzeCaptureAudio(capture_buffer)); + public_submodules_->noise_suppression->AnalyzeCaptureAudio(capture_buffer); + + // Ensure that the stream delay was set before the call to the + // AEC ProcessCaptureAudio function. + if (public_submodules_->echo_cancellation->is_enabled() && + !was_stream_delay_set()) { + return AudioProcessing::kStreamParameterNotSetError; + } + + if (private_submodules_->echo_controller) { + private_submodules_->echo_controller->ProcessCapture( + capture_buffer, capture_.echo_path_gain_change); + } else { + RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessCaptureAudio( + capture_buffer, stream_delay_ms())); + } + + if (public_submodules_->echo_control_mobile->is_enabled() && + public_submodules_->noise_suppression->is_enabled()) { + capture_buffer->CopyLowPassToReference(); + } + public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer); +#if WEBRTC_INTELLIGIBILITY_ENHANCER + if (capture_nonlocked_.intelligibility_enabled) { + RTC_DCHECK(public_submodules_->noise_suppression->is_enabled()); + int gain_db = public_submodules_->gain_control->is_enabled() ? + public_submodules_->gain_control->compression_gain_db() : + 0; + float gain = std::pow(10.f, gain_db / 20.f); + gain *= capture_nonlocked_.level_controller_enabled ? + private_submodules_->level_controller->GetLastGain() : + 1.f; + public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate( + public_submodules_->noise_suppression->NoiseEstimate(), gain); + } +#endif + + // Ensure that the stream delay was set before the call to the + // AECM ProcessCaptureAudio function. + if (public_submodules_->echo_control_mobile->is_enabled() && + !was_stream_delay_set()) { + return AudioProcessing::kStreamParameterNotSetError; + } + + if (!(private_submodules_->echo_controller || + public_submodules_->echo_cancellation->is_enabled())) { + RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio( + capture_buffer, stream_delay_ms())); + } + + if (capture_nonlocked_.beamformer_enabled) { + private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f()); + } + + public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer); + + if (constants_.use_experimental_agc && + public_submodules_->gain_control->is_enabled() && + (!capture_nonlocked_.beamformer_enabled || + private_submodules_->beamformer->is_target_present())) { + private_submodules_->agc_manager->Process( + capture_buffer->split_bands_const(0)[kBand0To8kHz], + capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate); + } + RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio( + capture_buffer, echo_cancellation()->stream_has_echo())); + + if (submodule_states_.CaptureMultiBandProcessingActive() && + SampleRateSupportsMultiBand( + capture_nonlocked_.capture_processing_format.sample_rate_hz())) { + capture_buffer->MergeFrequencyBands(); + } + + if (config_.residual_echo_detector.enabled) { + private_submodules_->residual_echo_detector->AnalyzeCaptureAudio( + rtc::ArrayView<const float>(capture_buffer->channels_f()[0], + capture_buffer->num_frames())); + } + + // TODO(aluebs): Investigate if the transient suppression placement should be + // before or after the AGC. + if (capture_.transient_suppressor_enabled) { + float voice_probability = + private_submodules_->agc_manager.get() + ? private_submodules_->agc_manager->voice_probability() + : 1.f; + + public_submodules_->transient_suppressor->Suppress( + capture_buffer->channels_f()[0], capture_buffer->num_frames(), + capture_buffer->num_channels(), + capture_buffer->split_bands_const_f(0)[kBand0To8kHz], + capture_buffer->num_frames_per_band(), capture_buffer->keyboard_data(), + capture_buffer->num_keyboard_frames(), voice_probability, + capture_.key_pressed); + } + + if (config_.gain_controller2.enabled) { + private_submodules_->gain_controller2->Process(capture_buffer); + } + + if (capture_nonlocked_.level_controller_enabled) { + private_submodules_->level_controller->Process(capture_buffer); + } + + if (private_submodules_->capture_post_processor) { + private_submodules_->capture_post_processor->Process(capture_buffer); + } + + // The level estimator operates on the recombined data. + public_submodules_->level_estimator->ProcessStream(capture_buffer); + + capture_output_rms_.Analyze(rtc::ArrayView<const int16_t>( + capture_buffer->channels_const()[0], + capture_nonlocked_.capture_processing_format.num_frames())); + if (log_rms) { + RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms", + levels.average, 1, RmsLevel::kMinLevelDb, 64); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms", + levels.peak, 1, RmsLevel::kMinLevelDb, 64); + } + + capture_.was_stream_delay_set = false; + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, + size_t samples_per_channel, + int sample_rate_hz, + ChannelLayout layout) { + TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_ChannelLayout"); + rtc::CritScope cs(&crit_render_); + const StreamConfig reverse_config = { + sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), + }; + if (samples_per_channel != reverse_config.num_frames()) { + return kBadDataLengthError; + } + return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config); +} + +int AudioProcessingImpl::ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig"); + rtc::CritScope cs(&crit_render_); + RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config)); + if (submodule_states_.RenderMultiBandProcessingActive()) { + render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(), + dest); + } else if (formats_.api_format.reverse_input_stream() != + formats_.api_format.reverse_output_stream()) { + render_.render_converter->Convert(src, input_config.num_samples(), dest, + output_config.num_samples()); + } else { + CopyAudioIfNeeded(src, input_config.num_frames(), + input_config.num_channels(), dest); + } + + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStreamLocked( + const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config) { + if (src == nullptr) { + return kNullPointerError; + } + + if (input_config.num_channels() == 0) { + return kBadNumberChannelsError; + } + + ProcessingConfig processing_config = formats_.api_format; + processing_config.reverse_input_stream() = input_config; + processing_config.reverse_output_stream() = output_config; + + RETURN_ON_ERR(MaybeInitializeRender(processing_config)); + assert(input_config.num_frames() == + formats_.api_format.reverse_input_stream().num_frames()); + + if (aec_dump_) { + const size_t channel_size = + formats_.api_format.reverse_input_stream().num_frames(); + const size_t num_channels = + formats_.api_format.reverse_input_stream().num_channels(); + aec_dump_->WriteRenderStreamMessage( + FloatAudioFrame(src, num_channels, channel_size)); + } + render_.render_audio->CopyFrom(src, + formats_.api_format.reverse_input_stream()); + return ProcessRenderStreamLocked(); +} + +int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame"); + rtc::CritScope cs(&crit_render_); + if (frame == nullptr) { + return kNullPointerError; + } + // Must be a native rate. + if (frame->sample_rate_hz_ != kSampleRate8kHz && + frame->sample_rate_hz_ != kSampleRate16kHz && + frame->sample_rate_hz_ != kSampleRate32kHz && + frame->sample_rate_hz_ != kSampleRate48kHz) { + return kBadSampleRateError; + } + + if (frame->num_channels_ <= 0) { + return kBadNumberChannelsError; + } + + ProcessingConfig processing_config = formats_.api_format; + processing_config.reverse_input_stream().set_sample_rate_hz( + frame->sample_rate_hz_); + processing_config.reverse_input_stream().set_num_channels( + frame->num_channels_); + processing_config.reverse_output_stream().set_sample_rate_hz( + frame->sample_rate_hz_); + processing_config.reverse_output_stream().set_num_channels( + frame->num_channels_); + + RETURN_ON_ERR(MaybeInitializeRender(processing_config)); + if (frame->samples_per_channel_ != + formats_.api_format.reverse_input_stream().num_frames()) { + return kBadDataLengthError; + } + + if (aec_dump_) { + aec_dump_->WriteRenderStreamMessage(*frame); + } + + render_.render_audio->DeinterleaveFrom(frame); + RETURN_ON_ERR(ProcessRenderStreamLocked()); + render_.render_audio->InterleaveTo( + frame, submodule_states_.RenderMultiBandProcessingActive()); + return kNoError; +} + +int AudioProcessingImpl::ProcessRenderStreamLocked() { + AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity. + + QueueNonbandedRenderAudio(render_buffer); + + if (submodule_states_.RenderMultiBandSubModulesActive() && + SampleRateSupportsMultiBand( + formats_.render_processing_format.sample_rate_hz())) { + render_buffer->SplitIntoFrequencyBands(); + } + +#if WEBRTC_INTELLIGIBILITY_ENHANCER + if (capture_nonlocked_.intelligibility_enabled) { + public_submodules_->intelligibility_enhancer->ProcessRenderAudio( + render_buffer); + } +#endif + + if (submodule_states_.RenderMultiBandSubModulesActive()) { + QueueBandedRenderAudio(render_buffer); + } + + // TODO(peah): Perform the queueing ínside QueueRenderAudiuo(). + if (private_submodules_->echo_controller) { + private_submodules_->echo_controller->AnalyzeRender(render_buffer); + } + + if (submodule_states_.RenderMultiBandProcessingActive() && + SampleRateSupportsMultiBand( + formats_.render_processing_format.sample_rate_hz())) { + render_buffer->MergeFrequencyBands(); + } + + return kNoError; +} + +int AudioProcessingImpl::set_stream_delay_ms(int delay) { + rtc::CritScope cs(&crit_capture_); + Error retval = kNoError; + capture_.was_stream_delay_set = true; + delay += capture_.delay_offset_ms; + + if (delay < 0) { + delay = 0; + retval = kBadStreamParameterWarning; + } + + // TODO(ajm): the max is rather arbitrarily chosen; investigate. + if (delay > 500) { + delay = 500; + retval = kBadStreamParameterWarning; + } + + capture_nonlocked_.stream_delay_ms = delay; + return retval; +} + +int AudioProcessingImpl::stream_delay_ms() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.stream_delay_ms; +} + +bool AudioProcessingImpl::was_stream_delay_set() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_.was_stream_delay_set; +} + +void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { + rtc::CritScope cs(&crit_capture_); + capture_.key_pressed = key_pressed; +} + +void AudioProcessingImpl::set_delay_offset_ms(int offset) { + rtc::CritScope cs(&crit_capture_); + capture_.delay_offset_ms = offset; +} + +int AudioProcessingImpl::delay_offset_ms() const { + rtc::CritScope cs(&crit_capture_); + return capture_.delay_offset_ms; +} + +void AudioProcessingImpl::AttachAecDump(std::unique_ptr<AecDump> aec_dump) { + RTC_DCHECK(aec_dump); + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + + // The previously attached AecDump will be destroyed with the + // 'aec_dump' parameter, which is after locks are released. + aec_dump_.swap(aec_dump); + WriteAecDumpConfigMessage(true); + aec_dump_->WriteInitMessage(ToStreamsConfig(formats_.api_format)); +} + +void AudioProcessingImpl::DetachAecDump() { + // The d-tor of a task-queue based AecDump blocks until all pending + // tasks are done. This construction avoids blocking while holding + // the render and capture locks. + std::unique_ptr<AecDump> aec_dump = nullptr; + { + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + aec_dump = std::move(aec_dump_); + } +} + +AudioProcessing::AudioProcessingStatistics::AudioProcessingStatistics() { + residual_echo_return_loss.Set(-100.0f, -100.0f, -100.0f, -100.0f); + echo_return_loss.Set(-100.0f, -100.0f, -100.0f, -100.0f); + echo_return_loss_enhancement.Set(-100.0f, -100.0f, -100.0f, -100.0f); + a_nlp.Set(-100.0f, -100.0f, -100.0f, -100.0f); +} + +AudioProcessing::AudioProcessingStatistics::AudioProcessingStatistics( + const AudioProcessingStatistics& other) = default; + +AudioProcessing::AudioProcessingStatistics::~AudioProcessingStatistics() = + default; + +// TODO(ivoc): Remove this when GetStatistics() becomes pure virtual. +AudioProcessing::AudioProcessingStatistics AudioProcessing::GetStatistics() + const { + return AudioProcessingStatistics(); +} + +// TODO(ivoc): Remove this when GetStatistics() becomes pure virtual. +AudioProcessingStats AudioProcessing::GetStatistics( + bool has_remote_tracks) const { + return AudioProcessingStats(); +} + +AudioProcessing::AudioProcessingStatistics AudioProcessingImpl::GetStatistics() + const { + AudioProcessingStatistics stats; + EchoCancellation::Metrics metrics; + if (private_submodules_->echo_controller) { + rtc::CritScope cs_capture(&crit_capture_); + auto ec_metrics = private_submodules_->echo_controller->GetMetrics(); + float erl = static_cast<float>(ec_metrics.echo_return_loss); + float erle = static_cast<float>(ec_metrics.echo_return_loss_enhancement); + // Instant value will also be used for min, max and average. + stats.echo_return_loss.Set(erl, erl, erl, erl); + stats.echo_return_loss_enhancement.Set(erle, erle, erle, erle); + } else if (public_submodules_->echo_cancellation->GetMetrics(&metrics) == + Error::kNoError) { + stats.a_nlp.Set(metrics.a_nlp); + stats.divergent_filter_fraction = metrics.divergent_filter_fraction; + stats.echo_return_loss.Set(metrics.echo_return_loss); + stats.echo_return_loss_enhancement.Set( + metrics.echo_return_loss_enhancement); + stats.residual_echo_return_loss.Set(metrics.residual_echo_return_loss); + } + { + rtc::CritScope cs_capture(&crit_capture_); + stats.residual_echo_likelihood = + private_submodules_->residual_echo_detector->echo_likelihood(); + stats.residual_echo_likelihood_recent_max = + private_submodules_->residual_echo_detector + ->echo_likelihood_recent_max(); + } + public_submodules_->echo_cancellation->GetDelayMetrics( + &stats.delay_median, &stats.delay_standard_deviation, + &stats.fraction_poor_delays); + return stats; +} + +AudioProcessingStats AudioProcessingImpl::GetStatistics( + bool has_remote_tracks) const { + AudioProcessingStats stats; + if (has_remote_tracks) { + EchoCancellation::Metrics metrics; + if (private_submodules_->echo_controller) { + rtc::CritScope cs_capture(&crit_capture_); + auto ec_metrics = private_submodules_->echo_controller->GetMetrics(); + stats.echo_return_loss = ec_metrics.echo_return_loss; + stats.echo_return_loss_enhancement = + ec_metrics.echo_return_loss_enhancement; + stats.delay_ms = ec_metrics.delay_ms; + } else if (public_submodules_->echo_cancellation->GetMetrics(&metrics) == + Error::kNoError) { + if (metrics.divergent_filter_fraction != -1.0f) { + stats.divergent_filter_fraction = + rtc::Optional<double>(metrics.divergent_filter_fraction); + } + if (metrics.echo_return_loss.instant != -100) { + stats.echo_return_loss = + rtc::Optional<double>(metrics.echo_return_loss.instant); + } + if (metrics.echo_return_loss_enhancement.instant != -100) { + stats.echo_return_loss_enhancement = + rtc::Optional<double>(metrics.echo_return_loss_enhancement.instant); + } + } + if (config_.residual_echo_detector.enabled) { + rtc::CritScope cs_capture(&crit_capture_); + stats.residual_echo_likelihood = rtc::Optional<double>( + private_submodules_->residual_echo_detector->echo_likelihood()); + stats.residual_echo_likelihood_recent_max = + rtc::Optional<double>(private_submodules_->residual_echo_detector + ->echo_likelihood_recent_max()); + } + int delay_median, delay_std; + float fraction_poor_delays; + if (public_submodules_->echo_cancellation->GetDelayMetrics( + &delay_median, &delay_std, &fraction_poor_delays) == + Error::kNoError) { + if (delay_median >= 0) { + stats.delay_median_ms = rtc::Optional<int32_t>(delay_median); + } + if (delay_std >= 0) { + stats.delay_standard_deviation_ms = rtc::Optional<int32_t>(delay_std); + } + } + } + return stats; +} + +EchoCancellation* AudioProcessingImpl::echo_cancellation() const { + return public_submodules_->echo_cancellation.get(); +} + +EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { + return public_submodules_->echo_control_mobile.get(); +} + +GainControl* AudioProcessingImpl::gain_control() const { + if (constants_.use_experimental_agc) { + return public_submodules_->gain_control_for_experimental_agc.get(); + } + return public_submodules_->gain_control.get(); +} + +HighPassFilter* AudioProcessingImpl::high_pass_filter() const { + return high_pass_filter_impl_.get(); +} + +LevelEstimator* AudioProcessingImpl::level_estimator() const { + return public_submodules_->level_estimator.get(); +} + +NoiseSuppression* AudioProcessingImpl::noise_suppression() const { + return public_submodules_->noise_suppression.get(); +} + +VoiceDetection* AudioProcessingImpl::voice_detection() const { + return public_submodules_->voice_detection.get(); +} + +void AudioProcessingImpl::MutateConfig( + rtc::FunctionView<void(AudioProcessing::Config*)> mutator) { + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + mutator(&config_); + ApplyConfig(config_); +} + +AudioProcessing::Config AudioProcessingImpl::GetConfig() const { + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + return config_; +} + +bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { + return submodule_states_.Update( + config_.high_pass_filter.enabled, + public_submodules_->echo_cancellation->is_enabled(), + public_submodules_->echo_control_mobile->is_enabled(), + config_.residual_echo_detector.enabled, + public_submodules_->noise_suppression->is_enabled(), + capture_nonlocked_.intelligibility_enabled, + capture_nonlocked_.beamformer_enabled, + public_submodules_->gain_control->is_enabled(), + config_.gain_controller2.enabled, + capture_nonlocked_.level_controller_enabled, + capture_nonlocked_.echo_controller_enabled, + public_submodules_->voice_detection->is_enabled(), + public_submodules_->level_estimator->is_enabled(), + capture_.transient_suppressor_enabled); +} + + +void AudioProcessingImpl::InitializeTransient() { + if (capture_.transient_suppressor_enabled) { + if (!public_submodules_->transient_suppressor.get()) { + public_submodules_->transient_suppressor.reset(new TransientSuppressor()); + } + public_submodules_->transient_suppressor->Initialize( + capture_nonlocked_.capture_processing_format.sample_rate_hz(), + capture_nonlocked_.split_rate, num_proc_channels()); + } +} + +void AudioProcessingImpl::InitializeBeamformer() { + if (capture_nonlocked_.beamformer_enabled) { + if (!private_submodules_->beamformer) { + private_submodules_->beamformer.reset(new NonlinearBeamformer( + capture_.array_geometry, 1u, capture_.target_direction)); + } + private_submodules_->beamformer->Initialize(kChunkSizeMs, + capture_nonlocked_.split_rate); + } +} + +void AudioProcessingImpl::InitializeIntelligibility() { +#if WEBRTC_INTELLIGIBILITY_ENHANCER + if (capture_nonlocked_.intelligibility_enabled) { + public_submodules_->intelligibility_enhancer.reset( + new IntelligibilityEnhancer(capture_nonlocked_.split_rate, + render_.render_audio->num_channels(), + render_.render_audio->num_bands(), + NoiseSuppressionImpl::num_noise_bins())); + } +#endif +} + +void AudioProcessingImpl::InitializeLowCutFilter() { + if (config_.high_pass_filter.enabled) { + private_submodules_->low_cut_filter.reset( + new LowCutFilter(num_proc_channels(), proc_sample_rate_hz())); + } else { + private_submodules_->low_cut_filter.reset(); + } +} + +void AudioProcessingImpl::InitializeEchoController() { + if (echo_control_factory_) { + private_submodules_->echo_controller = + echo_control_factory_->Create(proc_sample_rate_hz()); + } else { + private_submodules_->echo_controller.reset(); + } +} + +void AudioProcessingImpl::InitializeGainController2() { + if (config_.gain_controller2.enabled) { + private_submodules_->gain_controller2->Initialize(proc_sample_rate_hz()); + } +} + +void AudioProcessingImpl::InitializeLevelController() { + private_submodules_->level_controller->Initialize(proc_sample_rate_hz()); +} + +void AudioProcessingImpl::InitializeResidualEchoDetector() { + private_submodules_->residual_echo_detector->Initialize(); +} + +void AudioProcessingImpl::InitializePostProcessor() { + if (private_submodules_->capture_post_processor) { + private_submodules_->capture_post_processor->Initialize( + proc_sample_rate_hz(), num_proc_channels()); + } +} + +void AudioProcessingImpl::MaybeUpdateHistograms() { + static const int kMinDiffDelayMs = 60; + + if (echo_cancellation()->is_enabled()) { + // Activate delay_jumps_ counters if we know echo_cancellation is running. + // If a stream has echo we know that the echo_cancellation is in process. + if (capture_.stream_delay_jumps == -1 && + echo_cancellation()->stream_has_echo()) { + capture_.stream_delay_jumps = 0; + } + if (capture_.aec_system_delay_jumps == -1 && + echo_cancellation()->stream_has_echo()) { + capture_.aec_system_delay_jumps = 0; + } + + // Detect a jump in platform reported system delay and log the difference. + const int diff_stream_delay_ms = + capture_nonlocked_.stream_delay_ms - capture_.last_stream_delay_ms; + if (diff_stream_delay_ms > kMinDiffDelayMs && + capture_.last_stream_delay_ms != 0) { + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.PlatformReportedStreamDelayJump", + diff_stream_delay_ms, kMinDiffDelayMs, 1000, 100); + if (capture_.stream_delay_jumps == -1) { + capture_.stream_delay_jumps = 0; // Activate counter if needed. + } + capture_.stream_delay_jumps++; + } + capture_.last_stream_delay_ms = capture_nonlocked_.stream_delay_ms; + + // Detect a jump in AEC system delay and log the difference. + const int samples_per_ms = + rtc::CheckedDivExact(capture_nonlocked_.split_rate, 1000); + RTC_DCHECK_LT(0, samples_per_ms); + const int aec_system_delay_ms = + public_submodules_->echo_cancellation->GetSystemDelayInSamples() / + samples_per_ms; + const int diff_aec_system_delay_ms = + aec_system_delay_ms - capture_.last_aec_system_delay_ms; + if (diff_aec_system_delay_ms > kMinDiffDelayMs && + capture_.last_aec_system_delay_ms != 0) { + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AecSystemDelayJump", + diff_aec_system_delay_ms, kMinDiffDelayMs, 1000, + 100); + if (capture_.aec_system_delay_jumps == -1) { + capture_.aec_system_delay_jumps = 0; // Activate counter if needed. + } + capture_.aec_system_delay_jumps++; + } + capture_.last_aec_system_delay_ms = aec_system_delay_ms; + } +} + +void AudioProcessingImpl::UpdateHistogramsOnCallEnd() { + // Run in a single-threaded manner. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + + if (capture_.stream_delay_jumps > -1) { + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.NumOfPlatformReportedStreamDelayJumps", + capture_.stream_delay_jumps, 51); + } + capture_.stream_delay_jumps = -1; + capture_.last_stream_delay_ms = 0; + + if (capture_.aec_system_delay_jumps > -1) { + RTC_HISTOGRAM_ENUMERATION("WebRTC.Audio.NumOfAecSystemDelayJumps", + capture_.aec_system_delay_jumps, 51); + } + capture_.aec_system_delay_jumps = -1; + capture_.last_aec_system_delay_ms = 0; +} + +void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) { + if (!aec_dump_) { + return; + } + std::string experiments_description = + public_submodules_->echo_cancellation->GetExperimentsDescription(); + // TODO(peah): Add semicolon-separated concatenations of experiment + // descriptions for other submodules. + if (capture_nonlocked_.level_controller_enabled) { + experiments_description += "LevelController;"; + } + if (constants_.agc_clipped_level_min != kClippedLevelMin) { + experiments_description += "AgcClippingLevelExperiment;"; + } + if (capture_nonlocked_.echo_controller_enabled) { + experiments_description += "EchoController;"; + } + if (config_.gain_controller2.enabled) { + experiments_description += "GainController2;"; + } + + InternalAPMConfig apm_config; + + apm_config.aec_enabled = public_submodules_->echo_cancellation->is_enabled(); + apm_config.aec_delay_agnostic_enabled = + public_submodules_->echo_cancellation->is_delay_agnostic_enabled(); + apm_config.aec_drift_compensation_enabled = + public_submodules_->echo_cancellation->is_drift_compensation_enabled(); + apm_config.aec_extended_filter_enabled = + public_submodules_->echo_cancellation->is_extended_filter_enabled(); + apm_config.aec_suppression_level = static_cast<int>( + public_submodules_->echo_cancellation->suppression_level()); + + apm_config.aecm_enabled = + public_submodules_->echo_control_mobile->is_enabled(); + apm_config.aecm_comfort_noise_enabled = + public_submodules_->echo_control_mobile->is_comfort_noise_enabled(); + apm_config.aecm_routing_mode = + static_cast<int>(public_submodules_->echo_control_mobile->routing_mode()); + + apm_config.agc_enabled = public_submodules_->gain_control->is_enabled(); + apm_config.agc_mode = + static_cast<int>(public_submodules_->gain_control->mode()); + apm_config.agc_limiter_enabled = + public_submodules_->gain_control->is_limiter_enabled(); + apm_config.noise_robust_agc_enabled = constants_.use_experimental_agc; + + apm_config.hpf_enabled = config_.high_pass_filter.enabled; + + apm_config.ns_enabled = public_submodules_->noise_suppression->is_enabled(); + apm_config.ns_level = + static_cast<int>(public_submodules_->noise_suppression->level()); + + apm_config.transient_suppression_enabled = + capture_.transient_suppressor_enabled; + apm_config.intelligibility_enhancer_enabled = + capture_nonlocked_.intelligibility_enabled; + apm_config.experiments_description = experiments_description; + + if (!forced && apm_config == apm_config_for_aec_dump_) { + return; + } + aec_dump_->WriteConfig(apm_config); + apm_config_for_aec_dump_ = apm_config; +} + +void AudioProcessingImpl::RecordUnprocessedCaptureStream( + const float* const* src) { + RTC_DCHECK(aec_dump_); + WriteAecDumpConfigMessage(false); + + const size_t channel_size = formats_.api_format.input_stream().num_frames(); + const size_t num_channels = formats_.api_format.input_stream().num_channels(); + aec_dump_->AddCaptureStreamInput( + FloatAudioFrame(src, num_channels, channel_size)); + RecordAudioProcessingState(); +} + +void AudioProcessingImpl::RecordUnprocessedCaptureStream( + const AudioFrame& capture_frame) { + RTC_DCHECK(aec_dump_); + WriteAecDumpConfigMessage(false); + + aec_dump_->AddCaptureStreamInput(capture_frame); + RecordAudioProcessingState(); +} + +void AudioProcessingImpl::RecordProcessedCaptureStream( + const float* const* processed_capture_stream) { + RTC_DCHECK(aec_dump_); + + const size_t channel_size = formats_.api_format.output_stream().num_frames(); + const size_t num_channels = + formats_.api_format.output_stream().num_channels(); + aec_dump_->AddCaptureStreamOutput( + FloatAudioFrame(processed_capture_stream, num_channels, channel_size)); + aec_dump_->WriteCaptureStreamMessage(); +} + +void AudioProcessingImpl::RecordProcessedCaptureStream( + const AudioFrame& processed_capture_frame) { + RTC_DCHECK(aec_dump_); + + aec_dump_->AddCaptureStreamOutput(processed_capture_frame); + aec_dump_->WriteCaptureStreamMessage(); +} + +void AudioProcessingImpl::RecordAudioProcessingState() { + RTC_DCHECK(aec_dump_); + AecDump::AudioProcessingState audio_proc_state; + audio_proc_state.delay = capture_nonlocked_.stream_delay_ms; + audio_proc_state.drift = + public_submodules_->echo_cancellation->stream_drift_samples(); + audio_proc_state.level = gain_control()->stream_analog_level(); + audio_proc_state.keypress = capture_.key_pressed; + aec_dump_->AddAudioProcessingState(audio_proc_state); +} + +AudioProcessingImpl::ApmCaptureState::ApmCaptureState( + bool transient_suppressor_enabled, + const std::vector<Point>& array_geometry, + SphericalPointf target_direction) + : aec_system_delay_jumps(-1), + delay_offset_ms(0), + was_stream_delay_set(false), + last_stream_delay_ms(0), + last_aec_system_delay_ms(0), + stream_delay_jumps(-1), + output_will_be_muted(false), + key_pressed(false), + transient_suppressor_enabled(transient_suppressor_enabled), + array_geometry(array_geometry), + target_direction(target_direction), + capture_processing_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz), + echo_path_gain_change(false) {} + +AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default; + +AudioProcessingImpl::ApmRenderState::ApmRenderState() = default; + +AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl.h new file mode 100644 index 0000000000..021a52037c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl.h @@ -0,0 +1,426 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ + +#include <list> +#include <memory> +#include <vector> + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/render_queue_item_verifier.h" +#include "modules/audio_processing/rms_level.h" +#include "rtc_base/criticalsection.h" +#include "rtc_base/function_view.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/protobuf_utils.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/thread_annotations.h" +#include "system_wrappers/include/file_wrapper.h" + +namespace webrtc { + +class AudioConverter; +class NonlinearBeamformer; + +class AudioProcessingImpl : public AudioProcessing { + public: + // Methods forcing APM to run in a single-threaded manner. + // Acquires both the render and capture locks. + explicit AudioProcessingImpl(const webrtc::Config& config); + // AudioProcessingImpl takes ownership of capture post processor and + // beamformer. + AudioProcessingImpl(const webrtc::Config& config, + std::unique_ptr<PostProcessing> capture_post_processor, + std::unique_ptr<EchoControlFactory> echo_control_factory, + NonlinearBeamformer* beamformer); + ~AudioProcessingImpl() override; + int Initialize() override; + int Initialize(int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + int render_sample_rate_hz, + ChannelLayout capture_input_layout, + ChannelLayout capture_output_layout, + ChannelLayout render_input_layout) override; + int Initialize(const ProcessingConfig& processing_config) override; + void ApplyConfig(const AudioProcessing::Config& config) override; + void SetExtraOptions(const webrtc::Config& config) override; + void UpdateHistogramsOnCallEnd() override; + void AttachAecDump(std::unique_ptr<AecDump> aec_dump) override; + void DetachAecDump() override; + + // Capture-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the capture lock. + int ProcessStream(AudioFrame* frame) override; + int ProcessStream(const float* const* src, + size_t samples_per_channel, + int input_sample_rate_hz, + ChannelLayout input_layout, + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) override; + int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; + void set_output_will_be_muted(bool muted) override; + int set_stream_delay_ms(int delay) override; + void set_delay_offset_ms(int offset) override; + int delay_offset_ms() const override; + void set_stream_key_pressed(bool key_pressed) override; + + // Render-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the render lock. + int ProcessReverseStream(AudioFrame* frame) override; + int AnalyzeReverseStream(const float* const* data, + size_t samples_per_channel, + int sample_rate_hz, + ChannelLayout layout) override; + int ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; + + // Methods only accessed from APM submodules or + // from AudioProcessing tests in a single-threaded manner. + // Hence there is no need for locks in these. + int proc_sample_rate_hz() const override; + int proc_split_sample_rate_hz() const override; + size_t num_input_channels() const override; + size_t num_proc_channels() const override; + size_t num_output_channels() const override; + size_t num_reverse_channels() const override; + int stream_delay_ms() const override; + bool was_stream_delay_set() const override + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + AudioProcessingStatistics GetStatistics() const override; + AudioProcessingStats GetStatistics(bool has_remote_tracks) const override; + + // Methods returning pointers to APM submodules. + // No locks are aquired in those, as those locks + // would offer no protection (the submodules are + // created only once in a single-treaded manner + // during APM creation). + EchoCancellation* echo_cancellation() const override; + EchoControlMobile* echo_control_mobile() const override; + GainControl* gain_control() const override; + // TODO(peah): Deprecate this API call. + HighPassFilter* high_pass_filter() const override; + LevelEstimator* level_estimator() const override; + NoiseSuppression* noise_suppression() const override; + VoiceDetection* voice_detection() const override; + + // TODO(peah): Remove MutateConfig once the new API allows that. + void MutateConfig(rtc::FunctionView<void(AudioProcessing::Config*)> mutator); + AudioProcessing::Config GetConfig() const override; + + protected: + // Overridden in a mock. + virtual int InitializeLocked() + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + + private: + // TODO(peah): These friend classes should be removed as soon as the new + // parameter setting scheme allows. + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, DefaultBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, ValidConfigBehavior); + FRIEND_TEST_ALL_PREFIXES(ApmConfiguration, InValidConfigBehavior); + struct ApmPublicSubmodules; + struct ApmPrivateSubmodules; + + // Submodule interface implementations. + std::unique_ptr<HighPassFilter> high_pass_filter_impl_; + + // EchoControl factory. + std::unique_ptr<EchoControlFactory> echo_control_factory_; + + class ApmSubmoduleStates { + public: + explicit ApmSubmoduleStates(bool capture_post_processor_enabled); + // Updates the submodule state and returns true if it has changed. + bool Update(bool low_cut_filter_enabled, + bool echo_canceller_enabled, + bool mobile_echo_controller_enabled, + bool residual_echo_detector_enabled, + bool noise_suppressor_enabled, + bool intelligibility_enhancer_enabled, + bool beamformer_enabled, + bool adaptive_gain_controller_enabled, + bool gain_controller2_enabled, + bool level_controller_enabled, + bool echo_controller_enabled, + bool voice_activity_detector_enabled, + bool level_estimator_enabled, + bool transient_suppressor_enabled); + bool CaptureMultiBandSubModulesActive() const; + bool CaptureMultiBandProcessingActive() const; + bool CaptureFullBandProcessingActive() const; + bool RenderMultiBandSubModulesActive() const; + bool RenderMultiBandProcessingActive() const; + + private: + const bool capture_post_processor_enabled_ = false; + bool low_cut_filter_enabled_ = false; + bool echo_canceller_enabled_ = false; + bool mobile_echo_controller_enabled_ = false; + bool residual_echo_detector_enabled_ = false; + bool noise_suppressor_enabled_ = false; + bool intelligibility_enhancer_enabled_ = false; + bool beamformer_enabled_ = false; + bool adaptive_gain_controller_enabled_ = false; + bool gain_controller2_enabled_ = false; + bool level_controller_enabled_ = false; + bool echo_controller_enabled_ = false; + bool level_estimator_enabled_ = false; + bool voice_activity_detector_enabled_ = false; + bool transient_suppressor_enabled_ = false; + bool first_update_ = true; + }; + + // Method for modifying the formats struct that are called from both + // the render and capture threads. The check for whether modifications + // are needed is done while holding the render lock only, thereby avoiding + // that the capture thread blocks the render thread. + // The struct is modified in a single-threaded manner by holding both the + // render and capture locks. + int MaybeInitialize(const ProcessingConfig& config, bool force_initialization) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + int MaybeInitializeRender(const ProcessingConfig& processing_config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + int MaybeInitializeCapture(const ProcessingConfig& processing_config, + bool force_initialization) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + // Method for updating the state keeping track of the active submodules. + // Returns a bool indicating whether the state has changed. + bool UpdateActiveSubmoduleStates() + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // Methods requiring APM running in a single-threaded manner. + // Are called with both the render and capture locks already + // acquired. + void InitializeTransient() + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeBeamformer() + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeIntelligibility() + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + int InitializeLocked(const ProcessingConfig& config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeLevelController() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializeResidualEchoDetector() + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeLowCutFilter() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializeEchoController() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializeGainController2() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializePostProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + void EmptyQueuedRenderAudio(); + void AllocateRenderQueue() + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void QueueBandedRenderAudio(AudioBuffer* audio) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + void QueueNonbandedRenderAudio(AudioBuffer* audio) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + // Capture-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. + int ProcessCaptureStreamLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void MaybeUpdateHistograms() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // Render-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. + // TODO(ekm): Remove once all clients updated to new interface. + int AnalyzeReverseStreamLocked(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + int ProcessRenderStreamLocked() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + // Collects configuration settings from public and private + // submodules to be saved as an audioproc::Config message on the + // AecDump if it is attached. If not |forced|, only writes the current + // config if it is different from the last saved one; if |forced|, + // writes the config regardless of the last saved. + void WriteAecDumpConfigMessage(bool forced) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // Notifies attached AecDump of current configuration and capture data. + void RecordUnprocessedCaptureStream(const float* const* capture_stream) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + void RecordUnprocessedCaptureStream(const AudioFrame& capture_frame) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // Notifies attached AecDump of current configuration and + // processed capture data and issues a capture stream recording + // request. + void RecordProcessedCaptureStream( + const float* const* processed_capture_stream) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + void RecordProcessedCaptureStream(const AudioFrame& processed_capture_frame) + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // Notifies attached AecDump about current state (delay, drift, etc). + void RecordAudioProcessingState() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // AecDump instance used for optionally logging APM config, input + // and output to file in the AEC-dump format defined in debug.proto. + std::unique_ptr<AecDump> aec_dump_; + + // Hold the last config written with AecDump for avoiding writing + // the same config twice. + InternalAPMConfig apm_config_for_aec_dump_ RTC_GUARDED_BY(crit_capture_); + + // Critical sections. + rtc::CriticalSection crit_render_ RTC_ACQUIRED_BEFORE(crit_capture_); + rtc::CriticalSection crit_capture_; + + // Struct containing the Config specifying the behavior of APM. + AudioProcessing::Config config_; + + // Class containing information about what submodules are active. + ApmSubmoduleStates submodule_states_; + + // Structs containing the pointers to the submodules. + std::unique_ptr<ApmPublicSubmodules> public_submodules_; + std::unique_ptr<ApmPrivateSubmodules> private_submodules_; + + // State that is written to while holding both the render and capture locks + // but can be read without any lock being held. + // As this is only accessed internally of APM, and all internal methods in APM + // either are holding the render or capture locks, this construct is safe as + // it is not possible to read the variables while writing them. + struct ApmFormatState { + ApmFormatState() + : // Format of processing streams at input/output call sites. + api_format({{{kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}}}), + render_processing_format(kSampleRate16kHz, 1) {} + ProcessingConfig api_format; + StreamConfig render_processing_format; + } formats_; + + // APM constants. + const struct ApmConstants { + ApmConstants(int agc_startup_min_volume, + int agc_clipped_level_min, + bool use_experimental_agc) + : // Format of processing streams at input/output call sites. + agc_startup_min_volume(agc_startup_min_volume), + agc_clipped_level_min(agc_clipped_level_min), + use_experimental_agc(use_experimental_agc) {} + int agc_startup_min_volume; + int agc_clipped_level_min; + bool use_experimental_agc; + } constants_; + + struct ApmCaptureState { + ApmCaptureState(bool transient_suppressor_enabled, + const std::vector<Point>& array_geometry, + SphericalPointf target_direction); + ~ApmCaptureState(); + int aec_system_delay_jumps; + int delay_offset_ms; + bool was_stream_delay_set; + int last_stream_delay_ms; + int last_aec_system_delay_ms; + int stream_delay_jumps; + bool output_will_be_muted; + bool key_pressed; + bool transient_suppressor_enabled; + std::vector<Point> array_geometry; + SphericalPointf target_direction; + std::unique_ptr<AudioBuffer> capture_audio; + // Only the rate and samples fields of capture_processing_format_ are used + // because the capture processing number of channels is mutable and is + // tracked by the capture_audio_. + StreamConfig capture_processing_format; + int split_rate; + bool echo_path_gain_change; + } capture_ RTC_GUARDED_BY(crit_capture_); + + struct ApmCaptureNonLockedState { + ApmCaptureNonLockedState(bool beamformer_enabled, + bool intelligibility_enabled) + : capture_processing_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz), + stream_delay_ms(0), + beamformer_enabled(beamformer_enabled), + intelligibility_enabled(intelligibility_enabled) {} + // Only the rate and samples fields of capture_processing_format_ are used + // because the forward processing number of channels is mutable and is + // tracked by the capture_audio_. + StreamConfig capture_processing_format; + int split_rate; + int stream_delay_ms; + bool beamformer_enabled; + bool intelligibility_enabled; + bool level_controller_enabled = false; + bool echo_controller_enabled = false; + } capture_nonlocked_; + + struct ApmRenderState { + ApmRenderState(); + ~ApmRenderState(); + std::unique_ptr<AudioConverter> render_converter; + std::unique_ptr<AudioBuffer> render_audio; + } render_ RTC_GUARDED_BY(crit_render_); + + size_t aec_render_queue_element_max_size_ RTC_GUARDED_BY(crit_render_) + RTC_GUARDED_BY(crit_capture_) = 0; + std::vector<float> aec_render_queue_buffer_ RTC_GUARDED_BY(crit_render_); + std::vector<float> aec_capture_queue_buffer_ RTC_GUARDED_BY(crit_capture_); + + size_t aecm_render_queue_element_max_size_ RTC_GUARDED_BY(crit_render_) + RTC_GUARDED_BY(crit_capture_) = 0; + std::vector<int16_t> aecm_render_queue_buffer_ RTC_GUARDED_BY(crit_render_); + std::vector<int16_t> aecm_capture_queue_buffer_ RTC_GUARDED_BY(crit_capture_); + + size_t agc_render_queue_element_max_size_ RTC_GUARDED_BY(crit_render_) + RTC_GUARDED_BY(crit_capture_) = 0; + std::vector<int16_t> agc_render_queue_buffer_ RTC_GUARDED_BY(crit_render_); + std::vector<int16_t> agc_capture_queue_buffer_ RTC_GUARDED_BY(crit_capture_); + + size_t red_render_queue_element_max_size_ RTC_GUARDED_BY(crit_render_) + RTC_GUARDED_BY(crit_capture_) = 0; + std::vector<float> red_render_queue_buffer_ RTC_GUARDED_BY(crit_render_); + std::vector<float> red_capture_queue_buffer_ RTC_GUARDED_BY(crit_capture_); + + RmsLevel capture_input_rms_ RTC_GUARDED_BY(crit_capture_); + RmsLevel capture_output_rms_ RTC_GUARDED_BY(crit_capture_); + int capture_rms_interval_counter_ RTC_GUARDED_BY(crit_capture_) = 0; + + // Lock protection not needed. + std::unique_ptr<SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>> + aec_render_signal_queue_; + std::unique_ptr< + SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>> + aecm_render_signal_queue_; + std::unique_ptr< + SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>> + agc_render_signal_queue_; + std::unique_ptr<SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>> + red_render_signal_queue_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc new file mode 100644 index 0000000000..6c3bad57e6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc @@ -0,0 +1,1135 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_processing_impl.h" + +#include <algorithm> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/test/test_utils.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/criticalsection.h" +#include "rtc_base/event.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/sleep.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +class AudioProcessingImplLockTest; + +// Type of the render thread APM API call to use in the test. +enum class RenderApiImpl { + ProcessReverseStreamImpl1, + ProcessReverseStreamImpl2, + AnalyzeReverseStreamImpl +}; + +// Type of the capture thread APM API call to use in the test. +enum class CaptureApiImpl { + ProcessStreamImpl1, + ProcessStreamImpl2, + ProcessStreamImpl3 +}; + +// The runtime parameter setting scheme to use in the test. +enum class RuntimeParameterSettingScheme { + SparseStreamMetadataChangeScheme, + ExtremeStreamMetadataChangeScheme, + FixedMonoStreamMetadataScheme, + FixedStereoStreamMetadataScheme +}; + +// Variant of echo canceller settings to use in the test. +enum class AecType { + BasicWebRtcAecSettings, + AecTurnedOff, + BasicWebRtcAecSettingsWithExtentedFilter, + BasicWebRtcAecSettingsWithDelayAgnosticAec, + BasicWebRtcAecSettingsWithAecMobile +}; + +// Thread-safe random number generator wrapper. +class RandomGenerator { + public: + RandomGenerator() : rand_gen_(42U) {} + + int RandInt(int min, int max) { + rtc::CritScope cs(&crit_); + return rand_gen_.Rand(min, max); + } + + int RandInt(int max) { + rtc::CritScope cs(&crit_); + return rand_gen_.Rand(max); + } + + float RandFloat() { + rtc::CritScope cs(&crit_); + return rand_gen_.Rand<float>(); + } + + private: + rtc::CriticalSection crit_; + Random rand_gen_ RTC_GUARDED_BY(crit_); +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(int max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + } + + AudioFrame frame; + std::vector<float*> output_frame; + std::vector<float> output_frame_channels; + AudioProcessing::ChannelLayout output_channel_layout = + AudioProcessing::ChannelLayout::kMono; + int input_sample_rate_hz = 16000; + int input_number_of_channels = -1; + std::vector<float*> input_frame; + std::vector<float> input_framechannels; + AudioProcessing::ChannelLayout input_channel_layout = + AudioProcessing::ChannelLayout::kMono; + int output_sample_rate_hz = 16000; + int output_number_of_channels = -1; + StreamConfig input_stream_config; + StreamConfig output_stream_config; + int input_samples_per_channel = -1; + int output_samples_per_channel = -1; +}; + +// The configuration for the test. +struct TestConfig { + // Test case generator for the test configurations to use in the brief tests. + static std::vector<TestConfig> GenerateBriefTestConfigs() { + std::vector<TestConfig> test_configs; + AecType aec_types[] = {AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto aec_type : aec_types) { + TestConfig test_config; + test_config.aec_type = aec_type; + + test_config.min_number_of_calls = 300; + + // Perform tests only with the extreme runtime parameter setting scheme. + test_config.runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + + // Only test 16 kHz for this test suite. + test_config.initial_sample_rate_hz = 16000; + + // Create test config for the second processing API function set. + test_config.render_api_function = + RenderApiImpl::ProcessReverseStreamImpl2; + test_config.capture_api_function = CaptureApiImpl::ProcessStreamImpl2; + + // Create test config for the first processing API function set. + test_configs.push_back(test_config); + test_config.render_api_function = + RenderApiImpl::AnalyzeReverseStreamImpl; + test_config.capture_api_function = CaptureApiImpl::ProcessStreamImpl3; + test_configs.push_back(test_config); + } + + // Return the created test configurations. + return test_configs; + } + + // Test case generator for the test configurations to use in the extensive + // tests. + static std::vector<TestConfig> GenerateExtensiveTestConfigs() { + // Lambda functions for the test config generation. + auto add_processing_apis = [](TestConfig test_config) { + struct AllowedApiCallCombinations { + RenderApiImpl render_api; + CaptureApiImpl capture_api; + }; + + const AllowedApiCallCombinations api_calls[] = { + {RenderApiImpl::ProcessReverseStreamImpl1, + CaptureApiImpl::ProcessStreamImpl1}, + {RenderApiImpl::ProcessReverseStreamImpl2, + CaptureApiImpl::ProcessStreamImpl2}, + {RenderApiImpl::ProcessReverseStreamImpl2, + CaptureApiImpl::ProcessStreamImpl3}, + {RenderApiImpl::AnalyzeReverseStreamImpl, + CaptureApiImpl::ProcessStreamImpl2}, + {RenderApiImpl::AnalyzeReverseStreamImpl, + CaptureApiImpl::ProcessStreamImpl3}}; + std::vector<TestConfig> out; + for (auto api_call : api_calls) { + test_config.render_api_function = api_call.render_api; + test_config.capture_api_function = api_call.capture_api; + out.push_back(test_config); + } + return out; + }; + + auto add_aec_settings = [](const std::vector<TestConfig>& in) { + std::vector<TestConfig> out; + AecType aec_types[] = { + AecType::BasicWebRtcAecSettings, AecType::AecTurnedOff, + AecType::BasicWebRtcAecSettingsWithExtentedFilter, + AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto test_config : in) { + // Due to a VisualStudio 2015 compiler issue, the internal loop + // variable here cannot override a previously defined name. + // In other words "type" cannot be named "aec_type" here. + // https://connect.microsoft.com/VisualStudio/feedback/details/2291755 + for (auto type : aec_types) { + test_config.aec_type = type; + out.push_back(test_config); + } + } + return out; + }; + + auto add_settings_scheme = [](const std::vector<TestConfig>& in) { + std::vector<TestConfig> out; + RuntimeParameterSettingScheme schemes[] = { + RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme, + RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme}; + + for (auto test_config : in) { + for (auto scheme : schemes) { + test_config.runtime_parameter_setting_scheme = scheme; + out.push_back(test_config); + } + } + return out; + }; + + auto add_sample_rates = [](const std::vector<TestConfig>& in) { + const int sample_rates[] = {8000, 16000, 32000, 48000}; + + std::vector<TestConfig> out; + for (auto test_config : in) { + auto available_rates = + (test_config.aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile + ? rtc::ArrayView<const int>(sample_rates, 2) + : rtc::ArrayView<const int>(sample_rates)); + + for (auto rate : available_rates) { + test_config.initial_sample_rate_hz = rate; + out.push_back(test_config); + } + } + return out; + }; + + // Generate test configurations of the relevant combinations of the + // parameters to + // test. + TestConfig test_config; + test_config.min_number_of_calls = 10000; + return add_sample_rates(add_settings_scheme( + add_aec_settings(add_processing_apis(test_config)))); + } + + RenderApiImpl render_api_function = RenderApiImpl::ProcessReverseStreamImpl2; + CaptureApiImpl capture_api_function = CaptureApiImpl::ProcessStreamImpl2; + RuntimeParameterSettingScheme runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + int initial_sample_rate_hz = 16000; + AecType aec_type = AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec; + int min_number_of_calls = 300; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { + rtc::CritScope cs(&crit_); + render_count++; + } + + void IncreaseCaptureCounter() { + rtc::CritScope cs(&crit_); + capture_count++; + } + + int GetCaptureCounter() const { + rtc::CritScope cs(&crit_); + return capture_count; + } + + int GetRenderCounter() const { + rtc::CritScope cs(&crit_); + return render_count; + } + + int CaptureMinusRenderCounters() const { + rtc::CritScope cs(&crit_); + return capture_count - render_count; + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) { + rtc::CritScope cs(&crit_); + return (render_count > threshold && capture_count > threshold); + } + + private: + rtc::CriticalSection crit_; + int render_count RTC_GUARDED_BY(crit_) = 0; + int capture_count RTC_GUARDED_BY(crit_) = 0; +}; + +// Class for handling the capture side processing. +class CaptureProcessor { + public: + CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm); + bool Process(); + + private: + static const int kMaxCallDifference = 10; + static const float kCaptureInputFloatLevel; + static const int kCaptureInputFixLevel = 1024; + + void PrepareFrame(); + void CallApmCaptureSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + AudioProcessingImplLockTest* const test_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; +}; + +// Class for handling the stats processing. +class StatsProcessor { + public: + StatsProcessor(RandomGenerator* rand_gen, + TestConfig* test_config, + AudioProcessing* apm); + bool Process(); + + private: + RandomGenerator* rand_gen_ = nullptr; + TestConfig* test_config_ = nullptr; + AudioProcessing* apm_ = nullptr; +}; + +// Class for handling the render side processing. +class RenderProcessor { + public: + RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm); + bool Process(); + + private: + static const int kMaxCallDifference = 10; + static const int kRenderInputFixLevel = 16384; + static const float kRenderInputFloatLevel; + + void PrepareFrame(); + void CallApmRenderSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + AudioProcessingImplLockTest* const test_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; + bool first_render_call_ = true; +}; + +class AudioProcessingImplLockTest + : public ::testing::TestWithParam<TestConfig> { + public: + AudioProcessingImplLockTest(); + bool RunTest(); + bool MaybeEndTest(); + + private: + static const int kTestTimeOutLimit = 10 * 60 * 1000; + static const int kMaxFrameSize = 480; + + // ::testing::TestWithParam<> implementation + void SetUp() override; + void TearDown() override; + + // Thread callback for the render thread + static bool RenderProcessorThreadFunc(void* context) { + return reinterpret_cast<AudioProcessingImplLockTest*>(context) + ->render_thread_state_.Process(); + } + + // Thread callback for the capture thread + static bool CaptureProcessorThreadFunc(void* context) { + return reinterpret_cast<AudioProcessingImplLockTest*>(context) + ->capture_thread_state_.Process(); + } + + // Thread callback for the stats thread + static bool StatsProcessorThreadFunc(void* context) { + return reinterpret_cast<AudioProcessingImplLockTest*>(context) + ->stats_thread_state_.Process(); + } + + // Tests whether all the required render and capture side calls have been + // done. + bool TestDone() { + return frame_counters_.BothCountersExceedeThreshold( + test_config_.min_number_of_calls); + } + + // Start the threads used in the test. + void StartThreads() { + render_thread_.Start(); + render_thread_.SetPriority(rtc::kRealtimePriority); + capture_thread_.Start(); + capture_thread_.SetPriority(rtc::kRealtimePriority); + stats_thread_.Start(); + stats_thread_.SetPriority(rtc::kNormalPriority); + } + + // Event handlers for the test. + rtc::Event test_complete_; + rtc::Event render_call_event_; + rtc::Event capture_call_event_; + + // Thread related variables. + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; + rtc::PlatformThread stats_thread_; + mutable RandomGenerator rand_gen_; + + std::unique_ptr<AudioProcessing> apm_; + TestConfig test_config_; + FrameCounters frame_counters_; + RenderProcessor render_thread_state_; + CaptureProcessor capture_thread_state_; + StatsProcessor stats_thread_state_; +}; + +// Sleeps a random time between 0 and max_sleep milliseconds. +void SleepRandomMs(int max_sleep, RandomGenerator* rand_gen) { + int sleeptime = rand_gen->RandInt(0, max_sleep); + SleepMs(sleeptime); +} + +// Populates a float audio frame with random data. +void PopulateAudioFrame(float** frame, + float amplitude, + size_t num_channels, + size_t samples_per_channel, + RandomGenerator* rand_gen) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random 16 bit quantized float number between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->RandFloat() - 1); + } + } +} + +// Populates an audioframe frame of AudioFrame type with random data. +void PopulateAudioFrame(AudioFrame* frame, + int16_t amplitude, + RandomGenerator* rand_gen) { + ASSERT_GT(amplitude, 0); + ASSERT_LE(amplitude, 32767); + int16_t* frame_data = frame->mutable_data(); + for (size_t ch = 0; ch < frame->num_channels_; ch++) { + for (size_t k = 0; k < frame->samples_per_channel_; k++) { + // Store random 16 bit number between -(amplitude+1) and + // amplitude. + frame_data[k * ch] = + rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1; + } + } +} + +AudioProcessingImplLockTest::AudioProcessingImplLockTest() + : test_complete_(false, false), + render_call_event_(false, false), + capture_call_event_(false, false), + render_thread_(RenderProcessorThreadFunc, this, "render"), + capture_thread_(CaptureProcessorThreadFunc, this, "capture"), + stats_thread_(StatsProcessorThreadFunc, this, "stats"), + apm_(AudioProcessingImpl::Create()), + render_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + this, + &test_config_, + apm_.get()), + capture_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + this, + &test_config_, + apm_.get()), + stats_thread_state_(&rand_gen_, &test_config_, apm_.get()) {} + +// Run the test with a timeout. +bool AudioProcessingImplLockTest::RunTest() { + StartThreads(); + return test_complete_.Wait(kTestTimeOutLimit); +} + +bool AudioProcessingImplLockTest::MaybeEndTest() { + if (HasFatalFailure() || TestDone()) { + test_complete_.Set(); + return true; + } + return false; +} + +// Setup of test and APM. +void AudioProcessingImplLockTest::SetUp() { + test_config_ = static_cast<TestConfig>(GetParam()); + + ASSERT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + + ASSERT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + + ASSERT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); + + Config config; + if (test_config_.aec_type == AecType::AecTurnedOff) { + ASSERT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + } else if (test_config_.aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile) { + ASSERT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + } else { + ASSERT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->enable_metrics(true)); + ASSERT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_delay_logging(true)); + + config.Set<ExtendedFilter>( + new ExtendedFilter(test_config_.aec_type == + AecType::BasicWebRtcAecSettingsWithExtentedFilter)); + + config.Set<DelayAgnostic>( + new DelayAgnostic(test_config_.aec_type == + AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec)); + + apm_->SetExtraOptions(config); + } +} + +void AudioProcessingImplLockTest::TearDown() { + render_call_event_.Set(); + capture_call_event_.Set(); + render_thread_.Stop(); + capture_thread_.Stop(); + stats_thread_.Stop(); +} + +StatsProcessor::StatsProcessor(RandomGenerator* rand_gen, + TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), test_config_(test_config), apm_(apm) {} + +// Implements the callback functionality for the statistics +// collection thread. +bool StatsProcessor::Process() { + SleepRandomMs(100, rand_gen_); + + EXPECT_EQ(apm_->echo_cancellation()->is_enabled(), + ((test_config_->aec_type != AecType::AecTurnedOff) && + (test_config_->aec_type != + AecType::BasicWebRtcAecSettingsWithAecMobile))); + apm_->echo_cancellation()->stream_drift_samples(); + EXPECT_EQ(apm_->echo_control_mobile()->is_enabled(), + (test_config_->aec_type != AecType::AecTurnedOff) && + (test_config_->aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile)); + EXPECT_TRUE(apm_->gain_control()->is_enabled()); + EXPECT_TRUE(apm_->noise_suppression()->is_enabled()); + + // The below return values are not testable. + apm_->noise_suppression()->speech_probability(); + apm_->voice_detection()->is_enabled(); + + return true; +} + +const float CaptureProcessor::kCaptureInputFloatLevel = 0.03125f; + +CaptureProcessor::CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_(test_framework), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the capture thread. +bool CaptureProcessor::Process() { + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Check whether the test is done. + if (test_->MaybeEndTest()) { + return false; + } + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->CaptureMinusRenderCounters() > kMaxCallDifference) { + render_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified capture side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the capture side processing call. + CallApmCaptureSide(); + + // Increase the number of capture-side calls. + frame_counters_->IncreaseCaptureCounter(); + + // Flag to the render thread that another capture API call has occurred + // by triggering this threads call event. + capture_call_event_->Set(); + + return true; +} + +// Prepares a frame with relevant audio data and metadata. +void CaptureProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the AudioFrame + // interface is used. + if (test_config_->capture_api_function == + CaptureApiImpl::ProcessStreamImpl1) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audioframe data and metadata. + frame_data_.input_samples_per_channel = + frame_data_.input_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.frame.sample_rate_hz_ = frame_data_.input_sample_rate_hz; + frame_data_.frame.num_channels_ = frame_data_.input_number_of_channels; + frame_data_.frame.samples_per_channel_ = + frame_data_.input_samples_per_channel; + PopulateAudioFrame(&frame_data_.frame, kCaptureInputFixLevel, rand_gen_); + + // Prepare the float audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + frame_data_.input_sample_rate_hz); + frame_data_.input_stream_config.set_num_channels( + frame_data_.input_number_of_channels); + frame_data_.input_stream_config.set_has_keyboard(false); + PopulateAudioFrame(&frame_data_.input_frame[0], kCaptureInputFloatLevel, + frame_data_.input_number_of_channels, + frame_data_.input_samples_per_channel, rand_gen_); + frame_data_.input_channel_layout = + (frame_data_.input_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); + + // Prepare the float audio output data and metadata. + frame_data_.output_samples_per_channel = + frame_data_.output_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.output_stream_config.set_sample_rate_hz( + frame_data_.output_sample_rate_hz); + frame_data_.output_stream_config.set_num_channels( + frame_data_.output_number_of_channels); + frame_data_.output_stream_config.set_has_keyboard(false); + frame_data_.output_channel_layout = + (frame_data_.output_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); +} + +// Applies the capture side processing API call. +void CaptureProcessor::CallApmCaptureSide() { + // Prepare a proper capture side processing API call input. + PrepareFrame(); + + // Set the stream delay. + apm_->set_stream_delay_ms(30); + + // Set the analog level. + apm_->gain_control()->set_stream_analog_level(80); + + // Call the specified capture side API processing method. + int result = AudioProcessing::kNoError; + switch (test_config_->capture_api_function) { + case CaptureApiImpl::ProcessStreamImpl1: + result = apm_->ProcessStream(&frame_data_.frame); + break; + case CaptureApiImpl::ProcessStreamImpl2: + result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_samples_per_channel, + frame_data_.input_sample_rate_hz, frame_data_.input_channel_layout, + frame_data_.output_sample_rate_hz, frame_data_.output_channel_layout, + &frame_data_.output_frame[0]); + break; + case CaptureApiImpl::ProcessStreamImpl3: + result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + break; + default: + FAIL(); + } + + // Retrieve the new analog level. + apm_->gain_control()->stream_analog_level(); + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any runtime capture APM API calls and audio stream characteristics +// specified by the scheme for the test. +void CaptureProcessor::ApplyRuntimeSettingScheme() { + const int capture_count_local = frame_counters_->GetCaptureCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (capture_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 11 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (capture_count_local % 73 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (capture_count_local % 89 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 97 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (capture_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (capture_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 5 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (capture_count_local % 47 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (capture_count_local % 53 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 71 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (capture_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (capture_count_local % 2 == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Call any specified runtime APM setter and + // getter calls. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local % 2 == 0) { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(30)); + apm_->set_stream_key_pressed(true); + apm_->set_delay_offset_ms(15); + EXPECT_EQ(apm_->delay_offset_ms(), 15); + } else { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(50)); + apm_->set_stream_key_pressed(false); + apm_->set_delay_offset_ms(20); + EXPECT_EQ(apm_->delay_offset_ms(), 20); + apm_->delay_offset_ms(); + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +const float RenderProcessor::kRenderInputFloatLevel = 0.5f; + +RenderProcessor::RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_(test_framework), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the render thread. +bool RenderProcessor::Process() { + // Conditional wait to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + if (first_render_call_) { + capture_call_event_->Wait(rtc::Event::kForever); + first_render_call_ = false; + } + + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Check whether the test is done. + if (test_->MaybeEndTest()) { + return false; + } + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + capture_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified render side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the render side processing call. + CallApmRenderSide(); + + // Increase the number of render-side calls. + frame_counters_->IncreaseRenderCounter(); + + // Flag to the capture thread that another render API call has occurred + // by triggering this threads call event. + render_call_event_->Set(); + return true; +} + +// Prepares the render side frame and the accompanying metadata +// with the appropriate information. +void RenderProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the AudioFrame interface is + // used. + if ((test_config_->render_api_function == + RenderApiImpl::ProcessReverseStreamImpl1) || + (test_config_->aec_type != + AecType::BasicWebRtcAecSettingsWithAecMobile)) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audioframe data and metadata + frame_data_.input_samples_per_channel = + frame_data_.input_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.frame.sample_rate_hz_ = frame_data_.input_sample_rate_hz; + frame_data_.frame.num_channels_ = frame_data_.input_number_of_channels; + frame_data_.frame.samples_per_channel_ = + frame_data_.input_samples_per_channel; + PopulateAudioFrame(&frame_data_.frame, kRenderInputFixLevel, rand_gen_); + + // Prepare the float audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + frame_data_.input_sample_rate_hz); + frame_data_.input_stream_config.set_num_channels( + frame_data_.input_number_of_channels); + frame_data_.input_stream_config.set_has_keyboard(false); + PopulateAudioFrame(&frame_data_.input_frame[0], kRenderInputFloatLevel, + frame_data_.input_number_of_channels, + frame_data_.input_samples_per_channel, rand_gen_); + frame_data_.input_channel_layout = + (frame_data_.input_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); + + // Prepare the float audio output data and metadata. + frame_data_.output_samples_per_channel = + frame_data_.output_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.output_stream_config.set_sample_rate_hz( + frame_data_.output_sample_rate_hz); + frame_data_.output_stream_config.set_num_channels( + frame_data_.output_number_of_channels); + frame_data_.output_stream_config.set_has_keyboard(false); + frame_data_.output_channel_layout = + (frame_data_.output_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); +} + +// Makes the render side processing API call. +void RenderProcessor::CallApmRenderSide() { + // Prepare a proper render side processing API call input. + PrepareFrame(); + + // Call the specified render side API processing method. + int result = AudioProcessing::kNoError; + switch (test_config_->render_api_function) { + case RenderApiImpl::ProcessReverseStreamImpl1: + result = apm_->ProcessReverseStream(&frame_data_.frame); + break; + case RenderApiImpl::ProcessReverseStreamImpl2: + result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + break; + case RenderApiImpl::AnalyzeReverseStreamImpl: + result = apm_->AnalyzeReverseStream( + &frame_data_.input_frame[0], frame_data_.input_samples_per_channel, + frame_data_.input_sample_rate_hz, frame_data_.input_channel_layout); + break; + default: + FAIL(); + } + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any render capture side APM API calls and audio stream +// characteristics +// specified by the scheme for the test. +void RenderProcessor::ApplyRuntimeSettingScheme() { + const int render_count_local = frame_counters_->GetRenderCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (render_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 47 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (render_count_local % 71 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (render_count_local % 79 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 83 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (render_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (render_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 17 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (render_count_local % 19 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (render_count_local % 29 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 61 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (render_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (render_count_local == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +} // anonymous namespace + +TEST_P(AudioProcessingImplLockTest, LockTest) { + // Run test and verify that it did not time out. + ASSERT_TRUE(RunTest()); +} + +// Instantiate tests from the extreme test configuration set. +INSTANTIATE_TEST_CASE_P( + DISABLED_AudioProcessingImplLockExtensive, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateExtensiveTestConfigs())); + +INSTANTIATE_TEST_CASE_P( + AudioProcessingImplLockBrief, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateBriefTestConfigs())); + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc new file mode 100644 index 0000000000..e152befc5c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/audio_processing_impl.h" + +#include "modules/audio_processing/test/test_utils.h" +#include "modules/include/module_common_types.h" +#include "test/gmock.h" +#include "test/gtest.h" + +using ::testing::Invoke; + +namespace webrtc { +namespace { + +class MockInitialize : public AudioProcessingImpl { + public: + explicit MockInitialize(const webrtc::Config& config) + : AudioProcessingImpl(config) {} + + MOCK_METHOD0(InitializeLocked, int()); + int RealInitializeLocked() RTC_NO_THREAD_SAFETY_ANALYSIS { + return AudioProcessingImpl::InitializeLocked(); + } + + MOCK_CONST_METHOD0(AddRef, void()); + MOCK_CONST_METHOD0(Release, rtc::RefCountReleaseStatus()); +}; + +} // namespace + +TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) { + webrtc::Config config; + MockInitialize mock(config); + ON_CALL(mock, InitializeLocked()) + .WillByDefault(Invoke(&mock, &MockInitialize::RealInitializeLocked)); + + EXPECT_CALL(mock, InitializeLocked()).Times(1); + mock.Initialize(); + + AudioFrame frame; + // Call with the default parameters; there should be an init. + frame.num_channels_ = 1; + SetFrameSampleRate(&frame, 16000); + EXPECT_CALL(mock, InitializeLocked()).Times(0); + EXPECT_NOERR(mock.ProcessStream(&frame)); + EXPECT_NOERR(mock.ProcessReverseStream(&frame)); + + // New sample rate. (Only impacts ProcessStream). + SetFrameSampleRate(&frame, 32000); + EXPECT_CALL(mock, InitializeLocked()) + .Times(1); + EXPECT_NOERR(mock.ProcessStream(&frame)); + + // New number of channels. + // TODO(peah): Investigate why this causes 2 inits. + frame.num_channels_ = 2; + EXPECT_CALL(mock, InitializeLocked()) + .Times(2); + EXPECT_NOERR(mock.ProcessStream(&frame)); + // ProcessStream sets num_channels_ == num_output_channels. + frame.num_channels_ = 2; + EXPECT_NOERR(mock.ProcessReverseStream(&frame)); + + // A new sample rate passed to ProcessReverseStream should cause an init. + SetFrameSampleRate(&frame, 16000); + EXPECT_CALL(mock, InitializeLocked()).Times(1); + EXPECT_NOERR(mock.ProcessReverseStream(&frame)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_neon_c_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_neon_c_gn/moz.build new file mode 100644 index 0000000000..27cfefabd4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_neon_c_gn/moz.build @@ -0,0 +1,183 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_HAS_NEON"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_neon.c" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +Library("audio_processing_neon_c_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_neon_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_neon_gn/moz.build new file mode 100644 index 0000000000..1589324a55 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_neon_gn/moz.build @@ -0,0 +1,200 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "1" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_HAS_NEON"] = True +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/", + "/third_party/libwebrtc/webrtc/common_audio/resampler/include/", + "/third_party/libwebrtc/webrtc/common_audio/signal_processing/include/", + "/third_party/libwebrtc/webrtc/common_audio/vad/include/" +] + +SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/aecm/aecm_core_neon.cc" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_neon.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + + OS_LIBS += [ + "rt" + ] + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + + OS_LIBS += [ + "winmm" + ] + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Android": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + +Library("audio_processing_neon_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_performance_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_performance_unittest.cc new file mode 100644 index 0000000000..ea01f24af0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_performance_unittest.cc @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/audio_processing_impl.h" + +#include <math.h> + +#include <algorithm> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/test/test_utils.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "rtc_base/platform_thread.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/clock.h" +#include "system_wrappers/include/event_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/perf_test.h" + +// Check to verify that the define for the intelligibility enhancer is properly +// set. +#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \ + (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \ + WEBRTC_INTELLIGIBILITY_ENHANCER != 1) +#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1" +#endif + +namespace webrtc { + +namespace { + +static const bool kPrintAllDurations = false; + +class CallSimulator; + +// Type of the render thread APM API call to use in the test. +enum class ProcessorType { kRender, kCapture }; + +// Variant of APM processing settings to use in the test. +enum class SettingsType { + kDefaultApmDesktop, + kDefaultApmMobile, + kDefaultApmDesktopAndBeamformer, + kDefaultApmDesktopAndIntelligibilityEnhancer, + kAllSubmodulesTurnedOff, + kDefaultApmDesktopWithoutDelayAgnostic, + kDefaultApmDesktopWithoutExtendedFilter +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(size_t max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + } + + std::vector<float> output_frame_channels; + std::vector<float*> output_frame; + std::vector<float> input_framechannels; + std::vector<float*> input_frame; + StreamConfig input_stream_config; + StreamConfig output_stream_config; +}; + +// The configuration for the test. +struct SimulationConfig { + SimulationConfig(int sample_rate_hz, SettingsType simulation_settings) + : sample_rate_hz(sample_rate_hz), + simulation_settings(simulation_settings) {} + + static std::vector<SimulationConfig> GenerateSimulationConfigs() { + std::vector<SimulationConfig> simulation_configs; +#ifndef WEBRTC_ANDROID + const SettingsType desktop_settings[] = { + SettingsType::kDefaultApmDesktop, SettingsType::kAllSubmodulesTurnedOff, + SettingsType::kDefaultApmDesktopWithoutDelayAgnostic, + SettingsType::kDefaultApmDesktopWithoutExtendedFilter}; + + const int desktop_sample_rates[] = {8000, 16000, 32000, 48000}; + + for (auto sample_rate : desktop_sample_rates) { + for (auto settings : desktop_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } + +#if WEBRTC_INTELLIGIBILITY_ENHANCER == 1 + const SettingsType intelligibility_enhancer_settings[] = { + SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer}; + + const int intelligibility_enhancer_sample_rates[] = {8000, 16000, 32000, + 48000}; + + for (auto sample_rate : intelligibility_enhancer_sample_rates) { + for (auto settings : intelligibility_enhancer_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } +#endif + + const SettingsType beamformer_settings[] = { + SettingsType::kDefaultApmDesktopAndBeamformer}; + + const int beamformer_sample_rates[] = {8000, 16000, 32000, 48000}; + + for (auto sample_rate : beamformer_sample_rates) { + for (auto settings : beamformer_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } +#endif + + const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile}; + + const int mobile_sample_rates[] = {8000, 16000}; + + for (auto sample_rate : mobile_sample_rates) { + for (auto settings : mobile_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } + + return simulation_configs; + } + + std::string SettingsDescription() const { + std::string description; + switch (simulation_settings) { + case SettingsType::kDefaultApmMobile: + description = "DefaultApmMobile"; + break; + case SettingsType::kDefaultApmDesktop: + description = "DefaultApmDesktop"; + break; + case SettingsType::kDefaultApmDesktopAndBeamformer: + description = "DefaultApmDesktopAndBeamformer"; + break; + case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: + description = "DefaultApmDesktopAndIntelligibilityEnhancer"; + break; + case SettingsType::kAllSubmodulesTurnedOff: + description = "AllSubmodulesOff"; + break; + case SettingsType::kDefaultApmDesktopWithoutDelayAgnostic: + description = "DefaultApmDesktopWithoutDelayAgnostic"; + break; + case SettingsType::kDefaultApmDesktopWithoutExtendedFilter: + description = "DefaultApmDesktopWithoutExtendedFilter"; + break; + } + return description; + } + + int sample_rate_hz = 16000; + SettingsType simulation_settings = SettingsType::kDefaultApmDesktop; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { + rtc::AtomicOps::Increment(&render_count_); + } + + void IncreaseCaptureCounter() { + rtc::AtomicOps::Increment(&capture_count_); + } + + int CaptureMinusRenderCounters() const { + // The return value will be approximate, but that's good enough since + // by the time we return the value, it's not guaranteed to be correct + // anyway. + return rtc::AtomicOps::AcquireLoad(&capture_count_) - + rtc::AtomicOps::AcquireLoad(&render_count_); + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) const { + // TODO(tommi): We could use an event to signal this so that we don't need + // to be polling from the main thread and possibly steal cycles. + const int capture_count = rtc::AtomicOps::AcquireLoad(&capture_count_); + const int render_count = rtc::AtomicOps::AcquireLoad(&render_count_); + return (render_count > threshold && capture_count > threshold); + } + + private: + int render_count_ = 0; + int capture_count_ = 0; +}; + +// Class that represents a flag that can only be raised. +class LockedFlag { + public: + bool get_flag() const { + return rtc::AtomicOps::AcquireLoad(&flag_); + } + + void set_flag() { + if (!get_flag()) // read-only operation to avoid affecting the cache-line. + rtc::AtomicOps::CompareAndSwap(&flag_, 0, 1); + } + + private: + int flag_ = 0; +}; + +// Parent class for the thread processors. +class TimedThreadApiProcessor { + public: + TimedThreadApiProcessor(ProcessorType processor_type, + Random* rand_gen, + FrameCounters* shared_counters_state, + LockedFlag* capture_call_checker, + CallSimulator* test_framework, + const SimulationConfig* simulation_config, + AudioProcessing* apm, + int num_durations_to_store, + float input_level, + int num_channels) + : rand_gen_(rand_gen), + frame_counters_(shared_counters_state), + capture_call_checker_(capture_call_checker), + test_(test_framework), + simulation_config_(simulation_config), + apm_(apm), + frame_data_(kMaxFrameSize), + clock_(webrtc::Clock::GetRealTimeClock()), + num_durations_to_store_(num_durations_to_store), + input_level_(input_level), + processor_type_(processor_type), + num_channels_(num_channels) { + api_call_durations_.reserve(num_durations_to_store_); + } + + // Implements the callback functionality for the threads. + bool Process(); + + // Method for printing out the simulation statistics. + void print_processor_statistics(const std::string& processor_name) const { + const std::string modifier = "_api_call_duration"; + + const std::string sample_rate_name = + "_" + std::to_string(simulation_config_->sample_rate_hz) + "Hz"; + + webrtc::test::PrintResultMeanAndError( + "apm_timing", sample_rate_name, processor_name, + GetDurationAverage(), GetDurationStandardDeviation(), + "us", false); + + if (kPrintAllDurations) { + webrtc::test::PrintResultList("apm_call_durations", sample_rate_name, + processor_name, api_call_durations_, "us", + false); + } + } + + void AddDuration(int64_t duration) { + if (api_call_durations_.size() < num_durations_to_store_) { + api_call_durations_.push_back(duration); + } + } + + private: + static const int kMaxCallDifference = 10; + static const int kMaxFrameSize = 480; + static const int kNumInitializationFrames = 5; + + int64_t GetDurationStandardDeviation() const { + double variance = 0; + const int64_t average_duration = GetDurationAverage(); + for (size_t k = kNumInitializationFrames; k < api_call_durations_.size(); + k++) { + int64_t tmp = api_call_durations_[k] - average_duration; + variance += static_cast<double>(tmp * tmp); + } + const int denominator = rtc::checked_cast<int>(api_call_durations_.size()) - + kNumInitializationFrames; + return (denominator > 0 + ? rtc::checked_cast<int64_t>(sqrt(variance / denominator)) + : -1); + } + + int64_t GetDurationAverage() const { + int64_t average_duration = 0; + for (size_t k = kNumInitializationFrames; k < api_call_durations_.size(); + k++) { + average_duration += api_call_durations_[k]; + } + const int denominator = rtc::checked_cast<int>(api_call_durations_.size()) - + kNumInitializationFrames; + return (denominator > 0 ? average_duration / denominator : -1); + } + + int ProcessCapture() { + // Set the stream delay. + apm_->set_stream_delay_ms(30); + + // Call and time the specified capture side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + + frame_counters_->IncreaseCaptureCounter(); + + AddDuration(end_time - start_time); + + if (first_process_call_) { + // Flag that the capture side has been called at least once + // (needed to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + capture_call_checker_->set_flag(); + first_process_call_ = false; + } + return result; + } + + bool ReadyToProcessCapture() { + return (frame_counters_->CaptureMinusRenderCounters() <= + kMaxCallDifference); + } + + int ProcessRender() { + // Call and time the specified render side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + frame_counters_->IncreaseRenderCounter(); + + AddDuration(end_time - start_time); + + return result; + } + + bool ReadyToProcessRender() { + // Do not process until at least one capture call has been done. + // (implicitly required by the APM API). + if (first_process_call_ && !capture_call_checker_->get_flag()) { + return false; + } + + // Ensure that the number of render and capture calls do not differ too + // much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + return false; + } + + first_process_call_ = false; + return true; + } + + void PrepareFrame() { + // Lambda function for populating a float multichannel audio frame + // with random data. + auto populate_audio_frame = [](float amplitude, size_t num_channels, + size_t samples_per_channel, Random* rand_gen, + float** frame) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random float number with a value between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->Rand<float>() - 1); + } + } + }; + + // Prepare the audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.input_stream_config.set_num_channels(num_channels_); + frame_data_.input_stream_config.set_has_keyboard(false); + populate_audio_frame(input_level_, num_channels_, + (simulation_config_->sample_rate_hz * + AudioProcessing::kChunkSizeMs / 1000), + rand_gen_, &frame_data_.input_frame[0]); + + // Prepare the float audio output data and metadata. + frame_data_.output_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.output_stream_config.set_num_channels(1); + frame_data_.output_stream_config.set_has_keyboard(false); + } + + bool ReadyToProcess() { + switch (processor_type_) { + case ProcessorType::kRender: + return ReadyToProcessRender(); + + case ProcessorType::kCapture: + return ReadyToProcessCapture(); + } + + // Should not be reached, but the return statement is needed for the code to + // build successfully on Android. + RTC_NOTREACHED(); + return false; + } + + Random* rand_gen_ = nullptr; + FrameCounters* frame_counters_ = nullptr; + LockedFlag* capture_call_checker_ = nullptr; + CallSimulator* test_ = nullptr; + const SimulationConfig* const simulation_config_ = nullptr; + AudioProcessing* apm_ = nullptr; + AudioFrameData frame_data_; + webrtc::Clock* clock_; + const size_t num_durations_to_store_; + std::vector<double> api_call_durations_; + const float input_level_; + bool first_process_call_ = true; + const ProcessorType processor_type_; + const int num_channels_ = 1; +}; + +// Class for managing the test simulation. +class CallSimulator : public ::testing::TestWithParam<SimulationConfig> { + public: + CallSimulator() + : test_complete_(EventWrapper::Create()), + render_thread_( + new rtc::PlatformThread(RenderProcessorThreadFunc, this, "render")), + capture_thread_(new rtc::PlatformThread(CaptureProcessorThreadFunc, + this, + "capture")), + rand_gen_(42U), + simulation_config_(static_cast<SimulationConfig>(GetParam())) {} + + // Run the call simulation with a timeout. + EventTypeWrapper Run() { + StartThreads(); + + EventTypeWrapper result = test_complete_->Wait(kTestTimeout); + + StopThreads(); + + render_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_render"); + capture_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_capture"); + + return result; + } + + // Tests whether all the required render and capture side calls have been + // done. + bool MaybeEndTest() { + if (frame_counters_.BothCountersExceedeThreshold(kMinNumFramesToProcess)) { + test_complete_->Set(); + return true; + } + return false; + } + + private: + static const float kCaptureInputFloatLevel; + static const float kRenderInputFloatLevel; + static const int kMinNumFramesToProcess = 150; + static const int32_t kTestTimeout = 3 * 10 * kMinNumFramesToProcess; + + // ::testing::TestWithParam<> implementation. + void TearDown() override { StopThreads(); } + + // Stop all running threads. + void StopThreads() { + render_thread_->Stop(); + capture_thread_->Stop(); + } + + // Simulator and APM setup. + void SetUp() override { + // Lambda function for setting the default APM runtime settings for desktop. + auto set_default_desktop_apm_runtime_settings = [](AudioProcessing* apm) { + ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->enable_metrics(true)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_delay_logging(true)); + }; + + // Lambda function for setting the default APM runtime settings for mobile. + auto set_default_mobile_apm_runtime_settings = [](AudioProcessing* apm) { + ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(false)); + }; + + // Lambda function for turning off all of the APM runtime settings + // submodules. + auto turn_off_default_apm_runtime_settings = [](AudioProcessing* apm) { + ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(false)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->enable_metrics(false)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_delay_logging(false)); + }; + + // Lambda function for adding default desktop APM settings to a config. + auto add_default_desktop_config = [](Config* config) { + config->Set<ExtendedFilter>(new ExtendedFilter(true)); + config->Set<DelayAgnostic>(new DelayAgnostic(true)); + }; + + // Lambda function for adding beamformer settings to a config. + auto add_beamformer_config = [](Config* config) { + const size_t num_mics = 2; + const std::vector<Point> array_geometry = + ParseArrayGeometry("0 0 0 0.05 0 0", num_mics); + RTC_CHECK_EQ(array_geometry.size(), num_mics); + + config->Set<Beamforming>( + new Beamforming(true, array_geometry, + SphericalPointf(DegreesToRadians(90), 0.f, 1.f))); + }; + + int num_capture_channels = 1; + switch (simulation_config_.simulation_settings) { + case SettingsType::kDefaultApmMobile: { + apm_.reset(AudioProcessingImpl::Create()); + ASSERT_TRUE(!!apm_); + set_default_mobile_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktop: { + Config config; + add_default_desktop_config(&config); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + case SettingsType::kDefaultApmDesktopAndBeamformer: { + Config config; + add_beamformer_config(&config); + add_default_desktop_config(&config); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + num_capture_channels = 2; + break; + } + case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: { + Config config; + config.Set<Intelligibility>(new Intelligibility(true)); + add_default_desktop_config(&config); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + case SettingsType::kAllSubmodulesTurnedOff: { + apm_.reset(AudioProcessingImpl::Create()); + ASSERT_TRUE(!!apm_); + turn_off_default_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktopWithoutDelayAgnostic: { + Config config; + config.Set<ExtendedFilter>(new ExtendedFilter(true)); + config.Set<DelayAgnostic>(new DelayAgnostic(false)); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + case SettingsType::kDefaultApmDesktopWithoutExtendedFilter: { + Config config; + config.Set<ExtendedFilter>(new ExtendedFilter(false)); + config.Set<DelayAgnostic>(new DelayAgnostic(true)); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + } + + render_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kRender, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kRenderInputFloatLevel, 1)); + capture_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kCapture, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kCaptureInputFloatLevel, num_capture_channels)); + } + + // Thread callback for the render thread. + static bool RenderProcessorThreadFunc(void* context) { + return reinterpret_cast<CallSimulator*>(context) + ->render_thread_state_->Process(); + } + + // Thread callback for the capture thread. + static bool CaptureProcessorThreadFunc(void* context) { + return reinterpret_cast<CallSimulator*>(context) + ->capture_thread_state_->Process(); + } + + // Start the threads used in the test. + void StartThreads() { + ASSERT_NO_FATAL_FAILURE(render_thread_->Start()); + render_thread_->SetPriority(rtc::kRealtimePriority); + ASSERT_NO_FATAL_FAILURE(capture_thread_->Start()); + capture_thread_->SetPriority(rtc::kRealtimePriority); + } + + // Event handler for the test. + const std::unique_ptr<EventWrapper> test_complete_; + + // Thread related variables. + std::unique_ptr<rtc::PlatformThread> render_thread_; + std::unique_ptr<rtc::PlatformThread> capture_thread_; + Random rand_gen_; + + std::unique_ptr<AudioProcessing> apm_; + const SimulationConfig simulation_config_; + FrameCounters frame_counters_; + LockedFlag capture_call_checker_; + std::unique_ptr<TimedThreadApiProcessor> render_thread_state_; + std::unique_ptr<TimedThreadApiProcessor> capture_thread_state_; +}; + +// Implements the callback functionality for the threads. +bool TimedThreadApiProcessor::Process() { + PrepareFrame(); + + // Wait in a spinlock manner until it is ok to start processing. + // Note that SleepMs is not applicable since it only allows sleeping + // on a millisecond basis which is too long. + // TODO(tommi): This loop may affect the performance of the test that it's + // meant to measure. See if we could use events instead to signal readiness. + while (!ReadyToProcess()) { + } + + int result = AudioProcessing::kNoError; + switch (processor_type_) { + case ProcessorType::kRender: + result = ProcessRender(); + break; + case ProcessorType::kCapture: + result = ProcessCapture(); + break; + } + + EXPECT_EQ(result, AudioProcessing::kNoError); + + return !test_->MaybeEndTest(); +} + +const float CallSimulator::kRenderInputFloatLevel = 0.5f; +const float CallSimulator::kCaptureInputFloatLevel = 0.03125f; +} // anonymous namespace + +// TODO(peah): Reactivate once issue 7712 has been resolved. +TEST_P(CallSimulator, DISABLED_ApiCallDurationTest) { + // Run test and verify that it did not time out. + EXPECT_EQ(kEventSignaled, Run()); +} + +INSTANTIATE_TEST_CASE_P( + AudioProcessingPerformanceTest, + CallSimulator, + ::testing::ValuesIn(SimulationConfig::GenerateSimulationConfigs())); + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_sse2_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_sse2_gn/moz.build new file mode 100644 index 0000000000..f69cd941e5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_sse2_gn/moz.build @@ -0,0 +1,218 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_APM_DEBUG_DUMP"] = "1" +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/aec/aec_core_sse2.cc", + "/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + +if CONFIG["OS_TARGET"] == "Darwin": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["CR_XCODE_VERSION"] = "0920" + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + +if CONFIG["OS_TARGET"] == "DragonFly": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2", + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "FreeBSD": + + CXXFLAGS += [ + "-msse2", + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "FreeBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "NetBSD": + + CXXFLAGS += [ + "-msse2", + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "NetBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "OpenBSD": + + CXXFLAGS += [ + "-msse2", + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "OpenBSD": + + CXXFLAGS += [ + "-msse2" + ] + +Library("audio_processing_sse2_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build new file mode 100644 index 0000000000..848aff350d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_statistics_gn/moz.build @@ -0,0 +1,217 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + + ### This moz.build was AUTOMATICALLY GENERATED from a GN config, ### + ### DO NOT edit it by hand. ### + +COMPILE_FLAGS["OS_INCLUDES"] = [] +AllowCompilerWarnings() + +DEFINES["CHROMIUM_BUILD"] = True +DEFINES["V8_DEPRECATION_WARNINGS"] = True +DEFINES["WEBRTC_ENABLE_PROTOBUF"] = "0" +DEFINES["WEBRTC_MOZILLA_BUILD"] = True +DEFINES["WEBRTC_NON_STATIC_TRACE_EVENT_HANDLERS"] = "0" +DEFINES["WEBRTC_RESTRICT_LOGGING"] = True + +FINAL_LIBRARY = "webrtc" + + +LOCAL_INCLUDES += [ + "!/ipc/ipdl/_ipdlheaders", + "/ipc/chromium/src", + "/ipc/glue", + "/third_party/libwebrtc/webrtc/" +] + +UNIFIED_SOURCES += [ + "/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing_statistics.cc" +] + +if not CONFIG["MOZ_DEBUG"]: + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "0" + DEFINES["NDEBUG"] = True + DEFINES["NVALGRIND"] = True + +if CONFIG["MOZ_DEBUG"] == "1": + + DEFINES["DYNAMIC_ANNOTATIONS_ENABLED"] = "1" + DEFINES["WTF_USE_DYNAMIC_ANNOTATIONS"] = "1" + +if CONFIG["OS_TARGET"] == "Android": + + DEFINES["ANDROID"] = True + DEFINES["ANDROID_NDK_VERSION"] = "r12b" + DEFINES["DISABLE_NACL"] = True + DEFINES["HAVE_SYS_UIO_H"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["USE_OPENSSL_CERTS"] = "1" + DEFINES["WEBRTC_ANDROID"] = True + DEFINES["WEBRTC_ANDROID_OPENSLES"] = True + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + DEFINES["__GNU_SOURCE"] = "1" + + OS_LIBS += [ + "log" + ] + +if CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["NO_TCMALLOC"] = True + DEFINES["WEBRTC_MAC"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["__ASSERT_MACROS_DEFINE_VERSIONS_WITHOUT_UNDERSCORE"] = "0" + + OS_LIBS += [ + "-framework Foundation" + ] + +if CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "Linux": + + DEFINES["USE_NSS_CERTS"] = "1" + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_LINUX"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["USE_X11"] = "1" + DEFINES["WEBRTC_BSD"] = True + DEFINES["WEBRTC_POSIX"] = True + DEFINES["_FILE_OFFSET_BITS"] = "64" + +if CONFIG["OS_TARGET"] == "WINNT": + + DEFINES["CERT_CHAIN_PARA_HAS_EXTRA_FIELDS"] = True + DEFINES["NOMINMAX"] = True + DEFINES["NO_TCMALLOC"] = True + DEFINES["NTDDI_VERSION"] = "0x0A000000" + DEFINES["PSAPI_VERSION"] = "1" + DEFINES["UNICODE"] = True + DEFINES["WEBRTC_WIN"] = True + DEFINES["WIN32"] = True + DEFINES["WIN32_LEAN_AND_MEAN"] = True + DEFINES["WINVER"] = "0x0A00" + DEFINES["_ATL_NO_OPENGL"] = True + DEFINES["_CRT_RAND_S"] = True + DEFINES["_CRT_SECURE_NO_DEPRECATE"] = True + DEFINES["_CRT_SECURE_NO_WARNINGS"] = True + DEFINES["_HAS_EXCEPTIONS"] = "0" + DEFINES["_SCL_SECURE_NO_DEPRECATE"] = True + DEFINES["_SECURE_ATL"] = True + DEFINES["_UNICODE"] = True + DEFINES["_USING_V110_SDK71_"] = True + DEFINES["_WIN32_WINNT"] = "0x0A00" + DEFINES["_WINDOWS"] = True + DEFINES["__STD_C"] = True + +if CONFIG["CPU_ARCH"] == "aarch64": + + DEFINES["WEBRTC_ARCH_ARM64"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if CONFIG["CPU_ARCH"] == "arm": + + CXXFLAGS += [ + "-mfpu=neon" + ] + + DEFINES["WEBRTC_ARCH_ARM"] = True + DEFINES["WEBRTC_ARCH_ARM_V7"] = True + DEFINES["WEBRTC_HAS_NEON"] = True + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Android": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "DragonFly": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "FreeBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "NetBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if not CONFIG["MOZ_DEBUG"] and CONFIG["OS_TARGET"] == "OpenBSD": + + DEFINES["_FORTIFY_SOURCE"] = "2" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Android": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0120" + +if CONFIG["CPU_ARCH"] == "x86_64" and CONFIG["OS_TARGET"] == "Darwin": + + DEFINES["CR_XCODE_VERSION"] = "0920" + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "FreeBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "aarch64" and CONFIG["OS_TARGET"] == "Linux": + + DEFINES["DISABLE_NACL"] = True + DEFINES["NO_TCMALLOC"] = True + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "Linux": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "NetBSD": + + CXXFLAGS += [ + "-msse2" + ] + +if CONFIG["CPU_ARCH"] == "x86" and CONFIG["OS_TARGET"] == "OpenBSD": + + CXXFLAGS += [ + "-msse2" + ] + +Library("audio_processing_statistics_gn") diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_unittest.cc new file mode 100644 index 0000000000..4da0621abf --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/audio_processing_unittest.cc @@ -0,0 +1,3117 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <math.h> +#include <stdio.h> + +#include <algorithm> +#include <limits> +#include <memory> +#include <queue> + +#include "common_audio/include/audio_util.h" +#include "common_audio/resampler/include/push_resampler.h" +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/beamformer/mock_nonlinear_beamformer.h" +#include "modules/audio_processing/common.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/include/mock_audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller_constants.h" +#include "modules/audio_processing/test/protobuf_utils.h" +#include "modules/audio_processing/test/test_utils.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/gtest_prod_util.h" +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "rtc_base/protobuf_utils.h" +#include "rtc_base/refcountedobject.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/thread.h" +#include "system_wrappers/include/event_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/test/unittest.pb.h" +#else +#include "modules/audio_processing/test/unittest.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { +namespace { + +// TODO(ekmeyerson): Switch to using StreamConfig and ProcessingConfig where +// applicable. + +// TODO(bjornv): This is not feasible until the functionality has been +// re-implemented; see comment at the bottom of this file. For now, the user has +// to hard code the |write_ref_data| value. +// When false, this will compare the output data with the results stored to +// file. This is the typical case. When the file should be updated, it can +// be set to true with the command-line switch --write_ref_data. +bool write_ref_data = false; +const int32_t kChannels[] = {1, 2}; +const int kSampleRates[] = {8000, 16000, 32000, 48000}; + +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) +// Android doesn't support 48kHz. +const int kProcessSampleRates[] = {8000, 16000, 32000}; +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) +const int kProcessSampleRates[] = {8000, 16000, 32000, 48000}; +#endif + +enum StreamDirection { kForward = 0, kReverse }; + +void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) { + ChannelBuffer<int16_t> cb_int(cb->num_frames(), + cb->num_channels()); + Deinterleave(int_data, + cb->num_frames(), + cb->num_channels(), + cb_int.channels()); + for (size_t i = 0; i < cb->num_channels(); ++i) { + S16ToFloat(cb_int.channels()[i], + cb->num_frames(), + cb->channels()[i]); + } +} + +void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) { + ConvertToFloat(frame.data(), cb); +} + +// Number of channels including the keyboard channel. +size_t TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + return 1; + case AudioProcessing::kMonoAndKeyboard: + case AudioProcessing::kStereo: + return 2; + case AudioProcessing::kStereoAndKeyboard: + return 3; + } + RTC_NOTREACHED(); + return 0; +} + +int TruncateToMultipleOf10(int value) { + return (value / 10) * 10; +} + +void MixStereoToMono(const float* stereo, float* mono, + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; +} + +void MixStereoToMono(const int16_t* stereo, int16_t* mono, + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; +} + +void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { + stereo[i * 2 + 1] = stereo[i * 2]; + } +} + +void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { + EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]); + } +} + +void SetFrameTo(AudioFrame* frame, int16_t value) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + ++i) { + frame_data[i] = value; + } +} + +void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) { + ASSERT_EQ(2u, frame->num_channels_); + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { + frame_data[i] = left; + frame_data[i + 1] = right; + } +} + +void ScaleFrame(AudioFrame* frame, float scale) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; + ++i) { + frame_data[i] = FloatS16ToS16(frame_data[i] * scale); + } +} + +bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { + if (frame1.samples_per_channel_ != frame2.samples_per_channel_) { + return false; + } + if (frame1.num_channels_ != frame2.num_channels_) { + return false; + } + if (memcmp(frame1.data(), frame2.data(), + frame1.samples_per_channel_ * frame1.num_channels_ * + sizeof(int16_t))) { + return false; + } + return true; +} + +void EnableAllAPComponents(AudioProcessing* ap) { +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + EXPECT_NOERR(ap->echo_control_mobile()->Enable(true)); + + EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + EXPECT_NOERR(ap->gain_control()->Enable(true)); +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + EXPECT_NOERR(ap->echo_cancellation()->enable_drift_compensation(true)); + EXPECT_NOERR(ap->echo_cancellation()->enable_metrics(true)); + EXPECT_NOERR(ap->echo_cancellation()->enable_delay_logging(true)); + EXPECT_NOERR(ap->echo_cancellation()->Enable(true)); + + EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); + EXPECT_NOERR(ap->gain_control()->set_analog_level_limits(0, 255)); + EXPECT_NOERR(ap->gain_control()->Enable(true)); +#endif + + AudioProcessing::Config apm_config; + apm_config.high_pass_filter.enabled = true; + ap->ApplyConfig(apm_config); + + EXPECT_NOERR(ap->level_estimator()->Enable(true)); + EXPECT_NOERR(ap->noise_suppression()->Enable(true)); + + EXPECT_NOERR(ap->voice_detection()->Enable(true)); +} + +// These functions are only used by ApmTest.Process. +template <class T> +T AbsValue(T a) { + return a > 0 ? a: -a; +} + +int16_t MaxAudioFrame(const AudioFrame& frame) { + const size_t length = frame.samples_per_channel_ * frame.num_channels_; + const int16_t* frame_data = frame.data(); + int16_t max_data = AbsValue(frame_data[0]); + for (size_t i = 1; i < length; i++) { + max_data = std::max(max_data, AbsValue(frame_data[i])); + } + + return max_data; +} + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) +void TestStats(const AudioProcessing::Statistic& test, + const audioproc::Test::Statistic& reference) { + EXPECT_EQ(reference.instant(), test.instant); + EXPECT_EQ(reference.average(), test.average); + EXPECT_EQ(reference.maximum(), test.maximum); + EXPECT_EQ(reference.minimum(), test.minimum); +} + +void WriteStatsMessage(const AudioProcessing::Statistic& output, + audioproc::Test::Statistic* msg) { + msg->set_instant(output.instant); + msg->set_average(output.average); + msg->set_maximum(output.maximum); + msg->set_minimum(output.minimum); +} +#endif + +void OpenFileAndWriteMessage(const std::string& filename, + const MessageLite& msg) { + FILE* file = fopen(filename.c_str(), "wb"); + ASSERT_TRUE(file != NULL); + + int32_t size = msg.ByteSize(); + ASSERT_GT(size, 0); + std::unique_ptr<uint8_t[]> array(new uint8_t[size]); + ASSERT_TRUE(msg.SerializeToArray(array.get(), size)); + + ASSERT_EQ(1u, fwrite(&size, sizeof(size), 1, file)); + ASSERT_EQ(static_cast<size_t>(size), + fwrite(array.get(), sizeof(array[0]), size, file)); + fclose(file); +} + +std::string ResourceFilePath(const std::string& name, int sample_rate_hz) { + std::ostringstream ss; + // Resource files are all stereo. + ss << name << sample_rate_hz / 1000 << "_stereo"; + return test::ResourcePath(ss.str(), "pcm"); +} + +// Temporary filenames unique to this process. Used to be able to run these +// tests in parallel as each process needs to be running in isolation they can't +// have competing filenames. +std::map<std::string, std::string> temp_filenames; + +std::string OutputFilePath(const std::string& name, + int input_rate, + int output_rate, + int reverse_input_rate, + int reverse_output_rate, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, + StreamDirection file_direction) { + std::ostringstream ss; + ss << name << "_i" << num_input_channels << "_" << input_rate / 1000 << "_ir" + << num_reverse_input_channels << "_" << reverse_input_rate / 1000 << "_"; + if (num_output_channels == 1) { + ss << "mono"; + } else if (num_output_channels == 2) { + ss << "stereo"; + } else { + RTC_NOTREACHED(); + } + ss << output_rate / 1000; + if (num_reverse_output_channels == 1) { + ss << "_rmono"; + } else if (num_reverse_output_channels == 2) { + ss << "_rstereo"; + } else { + RTC_NOTREACHED(); + } + ss << reverse_output_rate / 1000; + ss << "_d" << file_direction << "_pcm"; + + std::string filename = ss.str(); + if (temp_filenames[filename].empty()) + temp_filenames[filename] = test::TempFilename(test::OutputPath(), filename); + return temp_filenames[filename]; +} + +void ClearTempFiles() { + for (auto& kv : temp_filenames) + remove(kv.second.c_str()); +} + +// Only remove "out" files. Keep "ref" files. +void ClearTempOutFiles() { + for (auto it = temp_filenames.begin(); it != temp_filenames.end();) { + const std::string& filename = it->first; + if (filename.substr(0, 3).compare("out") == 0) { + remove(it->second.c_str()); + temp_filenames.erase(it++); + } else { + it++; + } + } +} + +void OpenFileAndReadMessage(const std::string& filename, MessageLite* msg) { + FILE* file = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(file != NULL); + ReadMessageFromFile(file, msg); + fclose(file); +} + +// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed +// stereo) file, converts to deinterleaved float (optionally downmixing) and +// returns the result in |cb|. Returns false if the file ended (or on error) and +// true otherwise. +// +// |int_data| and |float_data| are just temporary space that must be +// sufficiently large to hold the 10 ms chunk. +bool ReadChunk(FILE* file, int16_t* int_data, float* float_data, + ChannelBuffer<float>* cb) { + // The files always contain stereo audio. + size_t frame_size = cb->num_frames() * 2; + size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file); + if (read_count != frame_size) { + // Check that the file really ended. + RTC_DCHECK(feof(file)); + return false; // This is expected. + } + + S16ToFloat(int_data, frame_size, float_data); + if (cb->num_channels() == 1) { + MixStereoToMono(float_data, cb->channels()[0], cb->num_frames()); + } else { + Deinterleave(float_data, cb->num_frames(), 2, + cb->channels()); + } + + return true; +} + +class ApmTest : public ::testing::Test { + protected: + ApmTest(); + virtual void SetUp(); + virtual void TearDown(); + + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + ClearTempFiles(); + } + + // Used to select between int and float interface tests. + enum Format { + kIntFormat, + kFloatFormat + }; + + void Init(int sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, + bool open_output_file); + void Init(AudioProcessing* ap); + void EnableAllComponents(); + bool ReadFrame(FILE* file, AudioFrame* frame); + bool ReadFrame(FILE* file, AudioFrame* frame, ChannelBuffer<float>* cb); + void ReadFrameWithRewind(FILE* file, AudioFrame* frame); + void ReadFrameWithRewind(FILE* file, AudioFrame* frame, + ChannelBuffer<float>* cb); + void ProcessWithDefaultStreamParameters(AudioFrame* frame); + void ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, + int delay_min, int delay_max); + void TestChangingChannelsInt16Interface( + size_t num_channels, + AudioProcessing::Error expected_return); + void TestChangingForwardChannels(size_t num_in_channels, + size_t num_out_channels, + AudioProcessing::Error expected_return); + void TestChangingReverseChannels(size_t num_rev_channels, + AudioProcessing::Error expected_return); + void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); + void RunManualVolumeChangeIsPossibleTest(int sample_rate); + void StreamParametersTest(Format format); + int ProcessStreamChooser(Format format); + int AnalyzeReverseStreamChooser(Format format); + void ProcessDebugDump(const std::string& in_filename, + const std::string& out_filename, + Format format, + int max_size_bytes); + void VerifyDebugDumpTest(Format format); + + const std::string output_path_; + const std::string ref_filename_; + std::unique_ptr<AudioProcessing> apm_; + AudioFrame* frame_; + AudioFrame* revframe_; + std::unique_ptr<ChannelBuffer<float> > float_cb_; + std::unique_ptr<ChannelBuffer<float> > revfloat_cb_; + int output_sample_rate_hz_; + size_t num_output_channels_; + FILE* far_file_; + FILE* near_file_; + FILE* out_file_; +}; + +ApmTest::ApmTest() + : output_path_(test::OutputPath()), +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + ref_filename_(test::ResourcePath("audio_processing/output_data_fixed", + "pb")), +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) +#if defined(WEBRTC_MAC) + // A different file for Mac is needed because on this platform the AEC + // constant |kFixedDelayMs| value is 20 and not 50 as it is on the rest. + ref_filename_(test::ResourcePath("audio_processing/output_data_mac", + "pb")), +#else + ref_filename_(test::ResourcePath("audio_processing/output_data_float", + "pb")), +#endif +#endif + frame_(NULL), + revframe_(NULL), + output_sample_rate_hz_(0), + num_output_channels_(0), + far_file_(NULL), + near_file_(NULL), + out_file_(NULL) { + Config config; + config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); + apm_.reset(AudioProcessing::Create(config)); +} + +void ApmTest::SetUp() { + ASSERT_TRUE(apm_.get() != NULL); + + frame_ = new AudioFrame(); + revframe_ = new AudioFrame(); + + Init(32000, 32000, 32000, 2, 2, 2, false); +} + +void ApmTest::TearDown() { + if (frame_) { + delete frame_; + } + frame_ = NULL; + + if (revframe_) { + delete revframe_; + } + revframe_ = NULL; + + if (far_file_) { + ASSERT_EQ(0, fclose(far_file_)); + } + far_file_ = NULL; + + if (near_file_) { + ASSERT_EQ(0, fclose(near_file_)); + } + near_file_ = NULL; + + if (out_file_) { + ASSERT_EQ(0, fclose(out_file_)); + } + out_file_ = NULL; +} + +void ApmTest::Init(AudioProcessing* ap) { + ASSERT_EQ(kNoErr, + ap->Initialize( + {{{frame_->sample_rate_hz_, frame_->num_channels_}, + {output_sample_rate_hz_, num_output_channels_}, + {revframe_->sample_rate_hz_, revframe_->num_channels_}, + {revframe_->sample_rate_hz_, revframe_->num_channels_}}})); +} + +void ApmTest::Init(int sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, + bool open_output_file) { + SetContainerFormat(sample_rate_hz, num_input_channels, frame_, &float_cb_); + output_sample_rate_hz_ = output_sample_rate_hz; + num_output_channels_ = num_output_channels; + + SetContainerFormat(reverse_sample_rate_hz, num_reverse_channels, revframe_, + &revfloat_cb_); + Init(apm_.get()); + + if (far_file_) { + ASSERT_EQ(0, fclose(far_file_)); + } + std::string filename = ResourceFilePath("far", sample_rate_hz); + far_file_ = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(far_file_ != NULL) << "Could not open file " << + filename << "\n"; + + if (near_file_) { + ASSERT_EQ(0, fclose(near_file_)); + } + filename = ResourceFilePath("near", sample_rate_hz); + near_file_ = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(near_file_ != NULL) << "Could not open file " << + filename << "\n"; + + if (open_output_file) { + if (out_file_) { + ASSERT_EQ(0, fclose(out_file_)); + } + filename = OutputFilePath( + "out", sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, + reverse_sample_rate_hz, num_input_channels, num_output_channels, + num_reverse_channels, num_reverse_channels, kForward); + out_file_ = fopen(filename.c_str(), "wb"); + ASSERT_TRUE(out_file_ != NULL) << "Could not open file " << + filename << "\n"; + } +} + +void ApmTest::EnableAllComponents() { + EnableAllAPComponents(apm_.get()); +} + +bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame, + ChannelBuffer<float>* cb) { + // The files always contain stereo audio. + size_t frame_size = frame->samples_per_channel_ * 2; + size_t read_count = fread(frame->mutable_data(), + sizeof(int16_t), + frame_size, + file); + if (read_count != frame_size) { + // Check that the file really ended. + EXPECT_NE(0, feof(file)); + return false; // This is expected. + } + + if (frame->num_channels_ == 1) { + MixStereoToMono(frame->data(), frame->mutable_data(), + frame->samples_per_channel_); + } + + if (cb) { + ConvertToFloat(*frame, cb); + } + return true; +} + +bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame) { + return ReadFrame(file, frame, NULL); +} + +// If the end of the file has been reached, rewind it and attempt to read the +// frame again. +void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame, + ChannelBuffer<float>* cb) { + if (!ReadFrame(near_file_, frame_, cb)) { + rewind(near_file_); + ASSERT_TRUE(ReadFrame(near_file_, frame_, cb)); + } +} + +void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame) { + ReadFrameWithRewind(file, frame, NULL); +} + +void ApmTest::ProcessWithDefaultStreamParameters(AudioFrame* frame) { + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame)); +} + +int ApmTest::ProcessStreamChooser(Format format) { + if (format == kIntFormat) { + return apm_->ProcessStream(frame_); + } + return apm_->ProcessStream(float_cb_->channels(), + frame_->samples_per_channel_, + frame_->sample_rate_hz_, + LayoutFromChannels(frame_->num_channels_), + output_sample_rate_hz_, + LayoutFromChannels(num_output_channels_), + float_cb_->channels()); +} + +int ApmTest::AnalyzeReverseStreamChooser(Format format) { + if (format == kIntFormat) { + return apm_->ProcessReverseStream(revframe_); + } + return apm_->AnalyzeReverseStream( + revfloat_cb_->channels(), + revframe_->samples_per_channel_, + revframe_->sample_rate_hz_, + LayoutFromChannels(revframe_->num_channels_)); +} + +void ApmTest::ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, + int delay_min, int delay_max) { + // The |revframe_| and |frame_| should include the proper frame information, + // hence can be used for extracting information. + AudioFrame tmp_frame; + std::queue<AudioFrame*> frame_queue; + bool causal = true; + + tmp_frame.CopyFrom(*revframe_); + SetFrameTo(&tmp_frame, 0); + + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + // Initialize the |frame_queue| with empty frames. + int frame_delay = delay_ms / 10; + while (frame_delay < 0) { + AudioFrame* frame = new AudioFrame(); + frame->CopyFrom(tmp_frame); + frame_queue.push(frame); + frame_delay++; + causal = false; + } + while (frame_delay > 0) { + AudioFrame* frame = new AudioFrame(); + frame->CopyFrom(tmp_frame); + frame_queue.push(frame); + frame_delay--; + } + // Run for 4.5 seconds, skipping statistics from the first 2.5 seconds. We + // need enough frames with audio to have reliable estimates, but as few as + // possible to keep processing time down. 4.5 seconds seemed to be a good + // compromise for this recording. + for (int frame_count = 0; frame_count < 450; ++frame_count) { + AudioFrame* frame = new AudioFrame(); + frame->CopyFrom(tmp_frame); + // Use the near end recording, since that has more speech in it. + ASSERT_TRUE(ReadFrame(near_file_, frame)); + frame_queue.push(frame); + AudioFrame* reverse_frame = frame; + AudioFrame* process_frame = frame_queue.front(); + if (!causal) { + reverse_frame = frame_queue.front(); + // When we call ProcessStream() the frame is modified, so we can't use the + // pointer directly when things are non-causal. Use an intermediate frame + // and copy the data. + process_frame = &tmp_frame; + process_frame->CopyFrom(*frame); + } + EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(reverse_frame)); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(system_delay_ms)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(process_frame)); + frame = frame_queue.front(); + frame_queue.pop(); + delete frame; + + if (frame_count == 250) { + int median; + int std; + float poor_fraction; + // Discard the first delay metrics to avoid convergence effects. + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->GetDelayMetrics(&median, &std, + &poor_fraction)); + } + } + + rewind(near_file_); + while (!frame_queue.empty()) { + AudioFrame* frame = frame_queue.front(); + frame_queue.pop(); + delete frame; + } + // Calculate expected delay estimate and acceptable regions. Further, + // limit them w.r.t. AEC delay estimation support. + const size_t samples_per_ms = + rtc::SafeMin<size_t>(16u, frame_->samples_per_channel_ / 10); + const int expected_median = + rtc::SafeClamp<int>(delay_ms - system_delay_ms, delay_min, delay_max); + const int expected_median_high = rtc::SafeClamp<int>( + expected_median + rtc::dchecked_cast<int>(96 / samples_per_ms), delay_min, + delay_max); + const int expected_median_low = rtc::SafeClamp<int>( + expected_median - rtc::dchecked_cast<int>(96 / samples_per_ms), delay_min, + delay_max); + // Verify delay metrics. + int median; + int std; + float poor_fraction; + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->GetDelayMetrics(&median, &std, + &poor_fraction)); + EXPECT_GE(expected_median_high, median); + EXPECT_LE(expected_median_low, median); +} + +void ApmTest::StreamParametersTest(Format format) { + // No errors when the components are disabled. + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + + // -- Missing AGC level -- + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // Resets after successful ProcessStream(). + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // Other stream parameters set correctly. + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(true)); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(false)); + + // -- Missing delay -- + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // Resets after successful ProcessStream(). + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // Other stream parameters set correctly. + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(true)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); + + // -- Missing drift -- + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // Resets after successful ProcessStream(). + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // Other stream parameters set correctly. + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // -- No stream parameters -- + EXPECT_EQ(apm_->kNoError, + AnalyzeReverseStreamChooser(format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(format)); + + // -- All there -- + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(format)); +} + +TEST_F(ApmTest, StreamParametersInt) { + StreamParametersTest(kIntFormat); +} + +TEST_F(ApmTest, StreamParametersFloat) { + StreamParametersTest(kFloatFormat); +} + +TEST_F(ApmTest, DefaultDelayOffsetIsZero) { + EXPECT_EQ(0, apm_->delay_offset_ms()); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(50)); + EXPECT_EQ(50, apm_->stream_delay_ms()); +} + +TEST_F(ApmTest, DelayOffsetWithLimitsIsSetProperly) { + // High limit of 500 ms. + apm_->set_delay_offset_ms(100); + EXPECT_EQ(100, apm_->delay_offset_ms()); + EXPECT_EQ(apm_->kBadStreamParameterWarning, apm_->set_stream_delay_ms(450)); + EXPECT_EQ(500, apm_->stream_delay_ms()); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(200, apm_->stream_delay_ms()); + + // Low limit of 0 ms. + apm_->set_delay_offset_ms(-50); + EXPECT_EQ(-50, apm_->delay_offset_ms()); + EXPECT_EQ(apm_->kBadStreamParameterWarning, apm_->set_stream_delay_ms(20)); + EXPECT_EQ(0, apm_->stream_delay_ms()); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(50, apm_->stream_delay_ms()); +} + +void ApmTest::TestChangingChannelsInt16Interface( + size_t num_channels, + AudioProcessing::Error expected_return) { + frame_->num_channels_ = num_channels; + EXPECT_EQ(expected_return, apm_->ProcessStream(frame_)); + EXPECT_EQ(expected_return, apm_->ProcessReverseStream(frame_)); +} + +void ApmTest::TestChangingForwardChannels( + size_t num_in_channels, + size_t num_out_channels, + AudioProcessing::Error expected_return) { + const StreamConfig input_stream = {frame_->sample_rate_hz_, num_in_channels}; + const StreamConfig output_stream = {output_sample_rate_hz_, num_out_channels}; + + EXPECT_EQ(expected_return, + apm_->ProcessStream(float_cb_->channels(), input_stream, + output_stream, float_cb_->channels())); +} + +void ApmTest::TestChangingReverseChannels( + size_t num_rev_channels, + AudioProcessing::Error expected_return) { + const ProcessingConfig processing_config = { + {{frame_->sample_rate_hz_, apm_->num_input_channels()}, + {output_sample_rate_hz_, apm_->num_output_channels()}, + {frame_->sample_rate_hz_, num_rev_channels}, + {frame_->sample_rate_hz_, num_rev_channels}}}; + + EXPECT_EQ( + expected_return, + apm_->ProcessReverseStream( + float_cb_->channels(), processing_config.reverse_input_stream(), + processing_config.reverse_output_stream(), float_cb_->channels())); +} + +TEST_F(ApmTest, ChannelsInt16Interface) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingChannelsInt16Interface(0, apm_->kBadNumberChannelsError); + + for (size_t i = 1; i < 4; i++) { + TestChangingChannelsInt16Interface(i, kNoErr); + EXPECT_EQ(i, apm_->num_input_channels()); + } +} + +TEST_F(ApmTest, Channels) { + // Testing number of invalid and valid channels. + Init(16000, 16000, 16000, 4, 4, 4, false); + + TestChangingForwardChannels(0, 1, apm_->kBadNumberChannelsError); + TestChangingReverseChannels(0, apm_->kBadNumberChannelsError); + + for (size_t i = 1; i < 4; ++i) { + for (size_t j = 0; j < 1; ++j) { + // Output channels much be one or match input channels. + if (j == 1 || i == j) { + TestChangingForwardChannels(i, j, kNoErr); + TestChangingReverseChannels(i, kNoErr); + + EXPECT_EQ(i, apm_->num_input_channels()); + EXPECT_EQ(j, apm_->num_output_channels()); + // The number of reverse channels used for processing to is always 1. + EXPECT_EQ(1u, apm_->num_reverse_channels()); + } else { + TestChangingForwardChannels(i, j, + AudioProcessing::kBadNumberChannelsError); + } + } + } +} + +TEST_F(ApmTest, SampleRatesInt) { + // Testing invalid sample rates + SetContainerFormat(10000, 2, frame_, &float_cb_); + EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(kIntFormat)); + // Testing valid sample rates + int fs[] = {8000, 16000, 32000, 48000}; + for (size_t i = 0; i < arraysize(fs); i++) { + SetContainerFormat(fs[i], 2, frame_, &float_cb_); + EXPECT_NOERR(ProcessStreamChooser(kIntFormat)); + } +} + +TEST_F(ApmTest, EchoCancellation) { + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(true)); + EXPECT_TRUE(apm_->echo_cancellation()->is_drift_compensation_enabled()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(false)); + EXPECT_FALSE(apm_->echo_cancellation()->is_drift_compensation_enabled()); + + EchoCancellation::SuppressionLevel level[] = { + EchoCancellation::kLowSuppression, + EchoCancellation::kModerateSuppression, + EchoCancellation::kHighSuppression, + }; + for (size_t i = 0; i < arraysize(level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->set_suppression_level(level[i])); + EXPECT_EQ(level[i], + apm_->echo_cancellation()->suppression_level()); + } + + EchoCancellation::Metrics metrics; + EXPECT_EQ(apm_->kNotEnabledError, + apm_->echo_cancellation()->GetMetrics(&metrics)); + + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + EXPECT_TRUE(apm_->echo_cancellation()->is_enabled()); + + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_metrics(true)); + EXPECT_TRUE(apm_->echo_cancellation()->are_metrics_enabled()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_metrics(false)); + EXPECT_FALSE(apm_->echo_cancellation()->are_metrics_enabled()); + + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_delay_logging(true)); + EXPECT_TRUE(apm_->echo_cancellation()->is_delay_logging_enabled()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_delay_logging(false)); + EXPECT_FALSE(apm_->echo_cancellation()->is_delay_logging_enabled()); + + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + EXPECT_FALSE(apm_->echo_cancellation()->is_enabled()); + + int median = 0; + int std = 0; + float poor_fraction = 0; + EXPECT_EQ(apm_->kNotEnabledError, apm_->echo_cancellation()->GetDelayMetrics( + &median, &std, &poor_fraction)); + + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + EXPECT_TRUE(apm_->echo_cancellation()->is_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + EXPECT_FALSE(apm_->echo_cancellation()->is_enabled()); + + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + EXPECT_TRUE(apm_->echo_cancellation()->is_enabled()); + EXPECT_TRUE(apm_->echo_cancellation()->aec_core() != NULL); + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + EXPECT_FALSE(apm_->echo_cancellation()->is_enabled()); + EXPECT_FALSE(apm_->echo_cancellation()->aec_core() != NULL); +} + +TEST_F(ApmTest, DISABLED_EchoCancellationReportsCorrectDelays) { + // TODO(bjornv): Fix this test to work with DA-AEC. + // Enable AEC only. + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(false)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_metrics(false)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_delay_logging(true)); + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + Config config; + config.Set<DelayAgnostic>(new DelayAgnostic(false)); + apm_->SetExtraOptions(config); + + // Internally in the AEC the amount of lookahead the delay estimation can + // handle is 15 blocks and the maximum delay is set to 60 blocks. + const int kLookaheadBlocks = 15; + const int kMaxDelayBlocks = 60; + // The AEC has a startup time before it actually starts to process. This + // procedure can flush the internal far-end buffer, which of course affects + // the delay estimation. Therefore, we set a system_delay high enough to + // avoid that. The smallest system_delay you can report without flushing the + // buffer is 66 ms in 8 kHz. + // + // It is known that for 16 kHz (and 32 kHz) sampling frequency there is an + // additional stuffing of 8 ms on the fly, but it seems to have no impact on + // delay estimation. This should be noted though. In case of test failure, + // this could be the cause. + const int kSystemDelayMs = 66; + // Test a couple of corner cases and verify that the estimated delay is + // within a valid region (set to +-1.5 blocks). Note that these cases are + // sampling frequency dependent. + for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { + Init(kProcessSampleRates[i], + kProcessSampleRates[i], + kProcessSampleRates[i], + 2, + 2, + 2, + false); + // Sampling frequency dependent variables. + const int num_ms_per_block = + std::max(4, static_cast<int>(640 / frame_->samples_per_channel_)); + const int delay_min_ms = -kLookaheadBlocks * num_ms_per_block; + const int delay_max_ms = (kMaxDelayBlocks - 1) * num_ms_per_block; + + // 1) Verify correct delay estimate at lookahead boundary. + int delay_ms = TruncateToMultipleOf10(kSystemDelayMs + delay_min_ms); + ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, + delay_max_ms); + // 2) A delay less than maximum lookahead should give an delay estimate at + // the boundary (= -kLookaheadBlocks * num_ms_per_block). + delay_ms -= 20; + ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, + delay_max_ms); + // 3) Three values around zero delay. Note that we need to compensate for + // the fake system_delay. + delay_ms = TruncateToMultipleOf10(kSystemDelayMs - 10); + ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, + delay_max_ms); + delay_ms = TruncateToMultipleOf10(kSystemDelayMs); + ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, + delay_max_ms); + delay_ms = TruncateToMultipleOf10(kSystemDelayMs + 10); + ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, + delay_max_ms); + // 4) Verify correct delay estimate at maximum delay boundary. + delay_ms = TruncateToMultipleOf10(kSystemDelayMs + delay_max_ms); + ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, + delay_max_ms); + // 5) A delay above the maximum delay should give an estimate at the + // boundary (= (kMaxDelayBlocks - 1) * num_ms_per_block). + delay_ms += 20; + ProcessDelayVerificationTest(delay_ms, kSystemDelayMs, delay_min_ms, + delay_max_ms); + } +} + +TEST_F(ApmTest, EchoControlMobile) { + // Turn AECM on (and AEC off) + Init(16000, 16000, 16000, 2, 2, 2, false); + EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); + EXPECT_TRUE(apm_->echo_control_mobile()->is_enabled()); + + // Toggle routing modes + EchoControlMobile::RoutingMode mode[] = { + EchoControlMobile::kQuietEarpieceOrHeadset, + EchoControlMobile::kEarpiece, + EchoControlMobile::kLoudEarpiece, + EchoControlMobile::kSpeakerphone, + EchoControlMobile::kLoudSpeakerphone, + }; + for (size_t i = 0; i < arraysize(mode); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->set_routing_mode(mode[i])); + EXPECT_EQ(mode[i], + apm_->echo_control_mobile()->routing_mode()); + } + // Turn comfort noise off/on + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->enable_comfort_noise(false)); + EXPECT_FALSE(apm_->echo_control_mobile()->is_comfort_noise_enabled()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->enable_comfort_noise(true)); + EXPECT_TRUE(apm_->echo_control_mobile()->is_comfort_noise_enabled()); + // Set and get echo path + const size_t echo_path_size = + apm_->echo_control_mobile()->echo_path_size_bytes(); + std::unique_ptr<char[]> echo_path_in(new char[echo_path_size]); + std::unique_ptr<char[]> echo_path_out(new char[echo_path_size]); + EXPECT_EQ(apm_->kNullPointerError, + apm_->echo_control_mobile()->SetEchoPath(NULL, echo_path_size)); + EXPECT_EQ(apm_->kNullPointerError, + apm_->echo_control_mobile()->GetEchoPath(NULL, echo_path_size)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_control_mobile()->GetEchoPath(echo_path_out.get(), 1)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->GetEchoPath(echo_path_out.get(), + echo_path_size)); + for (size_t i = 0; i < echo_path_size; i++) { + echo_path_in[i] = echo_path_out[i] + 1; + } + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_control_mobile()->SetEchoPath(echo_path_in.get(), 1)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->SetEchoPath(echo_path_in.get(), + echo_path_size)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->GetEchoPath(echo_path_out.get(), + echo_path_size)); + for (size_t i = 0; i < echo_path_size; i++) { + EXPECT_EQ(echo_path_in[i], echo_path_out[i]); + } + + // Process a few frames with NS in the default disabled state. This exercises + // a different codepath than with it enabled. + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + + // Turn AECM off + EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false)); + EXPECT_FALSE(apm_->echo_control_mobile()->is_enabled()); +} + +TEST_F(ApmTest, GainControl) { + // Testing gain modes + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode( + apm_->gain_control()->mode())); + + GainControl::Mode mode[] = { + GainControl::kAdaptiveAnalog, + GainControl::kAdaptiveDigital, + GainControl::kFixedDigital + }; + for (size_t i = 0; i < arraysize(mode); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode(mode[i])); + EXPECT_EQ(mode[i], apm_->gain_control()->mode()); + } + // Testing invalid target levels + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_target_level_dbfs(-3)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_target_level_dbfs(-40)); + // Testing valid target levels + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_target_level_dbfs( + apm_->gain_control()->target_level_dbfs())); + + int level_dbfs[] = {0, 6, 31}; + for (size_t i = 0; i < arraysize(level_dbfs); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_target_level_dbfs(level_dbfs[i])); + EXPECT_EQ(level_dbfs[i], apm_->gain_control()->target_level_dbfs()); + } + + // Testing invalid compression gains + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_compression_gain_db(-1)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_compression_gain_db(100)); + + // Testing valid compression gains + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_compression_gain_db( + apm_->gain_control()->compression_gain_db())); + + int gain_db[] = {0, 10, 90}; + for (size_t i = 0; i < arraysize(gain_db); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_compression_gain_db(gain_db[i])); + EXPECT_EQ(gain_db[i], apm_->gain_control()->compression_gain_db()); + } + + // Testing limiter off/on + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(false)); + EXPECT_FALSE(apm_->gain_control()->is_limiter_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(true)); + EXPECT_TRUE(apm_->gain_control()->is_limiter_enabled()); + + // Testing invalid level limits + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(-1, 512)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(100000, 512)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(512, -1)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(512, 100000)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(512, 255)); + + // Testing valid level limits + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_analog_level_limits( + apm_->gain_control()->analog_level_minimum(), + apm_->gain_control()->analog_level_maximum())); + + int min_level[] = {0, 255, 1024}; + for (size_t i = 0; i < arraysize(min_level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_analog_level_limits(min_level[i], 1024)); + EXPECT_EQ(min_level[i], apm_->gain_control()->analog_level_minimum()); + } + + int max_level[] = {0, 1024, 65535}; + for (size_t i = 0; i < arraysize(min_level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_analog_level_limits(0, max_level[i])); + EXPECT_EQ(max_level[i], apm_->gain_control()->analog_level_maximum()); + } + + // TODO(ajm): stream_is_saturated() and stream_analog_level() + + // Turn AGC off + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); + EXPECT_FALSE(apm_->gain_control()->is_enabled()); +} + +void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + + int out_analog_level = 0; + for (int i = 0; i < 2000; ++i) { + ReadFrameWithRewind(near_file_, frame_); + // Ensure the audio is at a low level, so the AGC will try to increase it. + ScaleFrame(frame_, 0.25); + + // Always pass in the same volume. + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(100)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + out_analog_level = apm_->gain_control()->stream_analog_level(); + } + + // Ensure the AGC is still able to reach the maximum. + EXPECT_EQ(255, out_analog_level); +} + +// Verifies that despite volume slider quantization, the AGC can continue to +// increase its volume. +TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) { + for (size_t i = 0; i < arraysize(kSampleRates); ++i) { + RunQuantizedVolumeDoesNotGetStuckTest(kSampleRates[i]); + } +} + +void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + + int out_analog_level = 100; + for (int i = 0; i < 1000; ++i) { + ReadFrameWithRewind(near_file_, frame_); + // Ensure the audio is at a low level, so the AGC will try to increase it. + ScaleFrame(frame_, 0.25); + + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(out_analog_level)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + out_analog_level = apm_->gain_control()->stream_analog_level(); + } + + // Ensure the volume was raised. + EXPECT_GT(out_analog_level, 100); + int highest_level_reached = out_analog_level; + // Simulate a user manual volume change. + out_analog_level = 100; + + for (int i = 0; i < 300; ++i) { + ReadFrameWithRewind(near_file_, frame_); + ScaleFrame(frame_, 0.25); + + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(out_analog_level)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + out_analog_level = apm_->gain_control()->stream_analog_level(); + // Check that AGC respected the manually adjusted volume. + EXPECT_LT(out_analog_level, highest_level_reached); + } + // Check that the volume was still raised. + EXPECT_GT(out_analog_level, 100); +} + +TEST_F(ApmTest, ManualVolumeChangeIsPossible) { + for (size_t i = 0; i < arraysize(kSampleRates); ++i) { + RunManualVolumeChangeIsPossibleTest(kSampleRates[i]); + } +} + +#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS) +TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { + const int kSampleRateHz = 16000; + const size_t kSamplesPerChannel = + static_cast<size_t>(AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000); + const size_t kNumInputChannels = 2; + const size_t kNumOutputChannels = 1; + const size_t kNumChunks = 700; + const float kScaleFactor = 0.25f; + Config config; + std::vector<webrtc::Point> geometry; + geometry.push_back(webrtc::Point(0.f, 0.f, 0.f)); + geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f)); + config.Set<Beamforming>(new Beamforming(true, geometry)); + testing::NiceMock<MockNonlinearBeamformer>* beamformer = + new testing::NiceMock<MockNonlinearBeamformer>(geometry, 1u); + std::unique_ptr<AudioProcessing> apm( + AudioProcessing::Create(config, nullptr, nullptr, beamformer)); + EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true)); + ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels); + ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels); + const size_t max_length = kSamplesPerChannel * std::max(kNumInputChannels, + kNumOutputChannels); + std::unique_ptr<int16_t[]> int_data(new int16_t[max_length]); + std::unique_ptr<float[]> float_data(new float[max_length]); + std::string filename = ResourceFilePath("far", kSampleRateHz); + FILE* far_file = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n"; + const int kDefaultVolume = apm->gain_control()->stream_analog_level(); + const int kDefaultCompressionGain = + apm->gain_control()->compression_gain_db(); + bool is_target = false; + EXPECT_CALL(*beamformer, is_target_present()) + .WillRepeatedly(testing::ReturnPointee(&is_target)); + for (size_t i = 0; i < kNumChunks; ++i) { + ASSERT_TRUE(ReadChunk(far_file, + int_data.get(), + float_data.get(), + &src_buf)); + for (size_t j = 0; j < kNumInputChannels; ++j) { + for (size_t k = 0; k < kSamplesPerChannel; ++k) { + src_buf.channels()[j][k] *= kScaleFactor; + } + } + EXPECT_EQ(kNoErr, + apm->ProcessStream(src_buf.channels(), + src_buf.num_frames(), + kSampleRateHz, + LayoutFromChannels(src_buf.num_channels()), + kSampleRateHz, + LayoutFromChannels(dest_buf.num_channels()), + dest_buf.channels())); + } + EXPECT_EQ(kDefaultVolume, + apm->gain_control()->stream_analog_level()); + EXPECT_EQ(kDefaultCompressionGain, + apm->gain_control()->compression_gain_db()); + rewind(far_file); + is_target = true; + for (size_t i = 0; i < kNumChunks; ++i) { + ASSERT_TRUE(ReadChunk(far_file, + int_data.get(), + float_data.get(), + &src_buf)); + for (size_t j = 0; j < kNumInputChannels; ++j) { + for (size_t k = 0; k < kSamplesPerChannel; ++k) { + src_buf.channels()[j][k] *= kScaleFactor; + } + } + EXPECT_EQ(kNoErr, + apm->ProcessStream(src_buf.channels(), + src_buf.num_frames(), + kSampleRateHz, + LayoutFromChannels(src_buf.num_channels()), + kSampleRateHz, + LayoutFromChannels(dest_buf.num_channels()), + dest_buf.channels())); + } + EXPECT_LT(kDefaultVolume, + apm->gain_control()->stream_analog_level()); + EXPECT_LT(kDefaultCompressionGain, + apm->gain_control()->compression_gain_db()); + ASSERT_EQ(0, fclose(far_file)); +} +#endif + +TEST_F(ApmTest, NoiseSuppression) { + // Test valid suppression levels. + NoiseSuppression::Level level[] = { + NoiseSuppression::kLow, + NoiseSuppression::kModerate, + NoiseSuppression::kHigh, + NoiseSuppression::kVeryHigh + }; + for (size_t i = 0; i < arraysize(level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->noise_suppression()->set_level(level[i])); + EXPECT_EQ(level[i], apm_->noise_suppression()->level()); + } + + // Turn NS on/off + EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(true)); + EXPECT_TRUE(apm_->noise_suppression()->is_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(false)); + EXPECT_FALSE(apm_->noise_suppression()->is_enabled()); +} + +TEST_F(ApmTest, HighPassFilter) { + // Turn HP filter on/off + AudioProcessing::Config apm_config; + apm_config.high_pass_filter.enabled = true; + apm_->ApplyConfig(apm_config); + apm_config.high_pass_filter.enabled = false; + apm_->ApplyConfig(apm_config); +} + +TEST_F(ApmTest, LevelEstimator) { + // Turn level estimator on/off + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); + EXPECT_FALSE(apm_->level_estimator()->is_enabled()); + + EXPECT_EQ(apm_->kNotEnabledError, apm_->level_estimator()->RMS()); + + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); + EXPECT_TRUE(apm_->level_estimator()->is_enabled()); + + // Run this test in wideband; in super-wb, the splitting filter distorts the + // audio enough to cause deviation from the expectation for small values. + frame_->samples_per_channel_ = 160; + frame_->num_channels_ = 2; + frame_->sample_rate_hz_ = 16000; + + // Min value if no frames have been processed. + EXPECT_EQ(127, apm_->level_estimator()->RMS()); + + // Min value on zero frames. + SetFrameTo(frame_, 0); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(127, apm_->level_estimator()->RMS()); + + // Try a few RMS values. + // (These also test that the value resets after retrieving it.) + SetFrameTo(frame_, 32767); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(0, apm_->level_estimator()->RMS()); + + SetFrameTo(frame_, 30000); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(1, apm_->level_estimator()->RMS()); + + SetFrameTo(frame_, 10000); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(10, apm_->level_estimator()->RMS()); + + SetFrameTo(frame_, 10); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(70, apm_->level_estimator()->RMS()); + + // Verify reset after enable/disable. + SetFrameTo(frame_, 32767); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); + SetFrameTo(frame_, 1); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(90, apm_->level_estimator()->RMS()); + + // Verify reset after initialize. + SetFrameTo(frame_, 32767); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + SetFrameTo(frame_, 1); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(90, apm_->level_estimator()->RMS()); +} + +TEST_F(ApmTest, VoiceDetection) { + // Test external VAD + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_stream_has_voice(true)); + EXPECT_TRUE(apm_->voice_detection()->stream_has_voice()); + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_stream_has_voice(false)); + EXPECT_FALSE(apm_->voice_detection()->stream_has_voice()); + + // Test valid likelihoods + VoiceDetection::Likelihood likelihood[] = { + VoiceDetection::kVeryLowLikelihood, + VoiceDetection::kLowLikelihood, + VoiceDetection::kModerateLikelihood, + VoiceDetection::kHighLikelihood + }; + for (size_t i = 0; i < arraysize(likelihood); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_likelihood(likelihood[i])); + EXPECT_EQ(likelihood[i], apm_->voice_detection()->likelihood()); + } + + /* TODO(bjornv): Enable once VAD supports other frame lengths than 10 ms + // Test invalid frame sizes + EXPECT_EQ(apm_->kBadParameterError, + apm_->voice_detection()->set_frame_size_ms(12)); + + // Test valid frame sizes + for (int i = 10; i <= 30; i += 10) { + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_frame_size_ms(i)); + EXPECT_EQ(i, apm_->voice_detection()->frame_size_ms()); + } + */ + + // Turn VAD on/off + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); + EXPECT_TRUE(apm_->voice_detection()->is_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); + EXPECT_FALSE(apm_->voice_detection()->is_enabled()); + + // Test that AudioFrame activity is maintained when VAD is disabled. + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); + AudioFrame::VADActivity activity[] = { + AudioFrame::kVadActive, + AudioFrame::kVadPassive, + AudioFrame::kVadUnknown + }; + for (size_t i = 0; i < arraysize(activity); i++) { + frame_->vad_activity_ = activity[i]; + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(activity[i], frame_->vad_activity_); + } + + // Test that AudioFrame activity is set when VAD is enabled. + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); + frame_->vad_activity_ = AudioFrame::kVadUnknown; + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_NE(AudioFrame::kVadUnknown, frame_->vad_activity_); + + // TODO(bjornv): Add tests for streamed voice; stream_has_voice() +} + +TEST_F(ApmTest, AllProcessingDisabledByDefault) { + EXPECT_FALSE(apm_->echo_cancellation()->is_enabled()); + EXPECT_FALSE(apm_->echo_control_mobile()->is_enabled()); + EXPECT_FALSE(apm_->gain_control()->is_enabled()); + EXPECT_FALSE(apm_->high_pass_filter()->is_enabled()); + EXPECT_FALSE(apm_->level_estimator()->is_enabled()); + EXPECT_FALSE(apm_->noise_suppression()->is_enabled()); + EXPECT_FALSE(apm_->voice_detection()->is_enabled()); +} + +TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { + for (size_t i = 0; i < arraysize(kSampleRates); i++) { + Init(kSampleRates[i], kSampleRates[i], kSampleRates[i], 2, 2, 2, false); + SetFrameTo(frame_, 1000, 2000); + AudioFrame frame_copy; + frame_copy.CopyFrom(*frame_); + for (int j = 0; j < 1000; j++) { + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(frame_)); + EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); + } + } +} + +TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledFloat) { + // Test that ProcessStream copies input to output even with no processing. + const size_t kSamples = 80; + const int sample_rate = 8000; + const float src[kSamples] = { + -1.0f, 0.0f, 1.0f + }; + float dest[kSamples] = {}; + + auto src_channels = &src[0]; + auto dest_channels = &dest[0]; + + apm_.reset(AudioProcessing::Create()); + EXPECT_NOERR(apm_->ProcessStream( + &src_channels, kSamples, sample_rate, LayoutFromChannels(1), + sample_rate, LayoutFromChannels(1), &dest_channels)); + + for (size_t i = 0; i < kSamples; ++i) { + EXPECT_EQ(src[i], dest[i]); + } + + // Same for ProcessReverseStream. + float rev_dest[kSamples] = {}; + auto rev_dest_channels = &rev_dest[0]; + + StreamConfig input_stream = {sample_rate, 1}; + StreamConfig output_stream = {sample_rate, 1}; + EXPECT_NOERR(apm_->ProcessReverseStream(&src_channels, input_stream, + output_stream, &rev_dest_channels)); + + for (size_t i = 0; i < kSamples; ++i) { + EXPECT_EQ(src[i], rev_dest[i]); + } +} + +TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { + EnableAllComponents(); + + for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { + Init(kProcessSampleRates[i], + kProcessSampleRates[i], + kProcessSampleRates[i], + 2, + 2, + 2, + false); + int analog_level = 127; + ASSERT_EQ(0, feof(far_file_)); + ASSERT_EQ(0, feof(near_file_)); + while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) { + CopyLeftToRightChannel(revframe_->mutable_data(), + revframe_->samples_per_channel_); + + ASSERT_EQ(kNoErr, apm_->ProcessReverseStream(revframe_)); + + CopyLeftToRightChannel(frame_->mutable_data(), + frame_->samples_per_channel_); + frame_->vad_activity_ = AudioFrame::kVadUnknown; + + ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + ASSERT_EQ(kNoErr, + apm_->gain_control()->set_stream_analog_level(analog_level)); + ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_)); + analog_level = apm_->gain_control()->stream_analog_level(); + + VerifyChannelsAreEqual(frame_->data(), frame_->samples_per_channel_); + } + rewind(far_file_); + rewind(near_file_); + } +} + +TEST_F(ApmTest, SplittingFilter) { + // Verify the filter is not active through undistorted audio when: + // 1. No components are enabled... + SetFrameTo(frame_, 1000); + AudioFrame frame_copy; + frame_copy.CopyFrom(*frame_); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); + + // 2. Only the level estimator is enabled... + SetFrameTo(frame_, 1000); + frame_copy.CopyFrom(*frame_); + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); + + // 3. Only VAD is enabled... + SetFrameTo(frame_, 1000); + frame_copy.CopyFrom(*frame_); + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); + + // 4. Both VAD and the level estimator are enabled... + SetFrameTo(frame_, 1000); + frame_copy.CopyFrom(*frame_); + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); + EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false)); + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); + + // 5. Not using super-wb. + frame_->samples_per_channel_ = 160; + frame_->num_channels_ = 2; + frame_->sample_rate_hz_ = 16000; + // Enable AEC, which would require the filter in super-wb. We rely on the + // first few frames of data being unaffected by the AEC. + // TODO(andrew): This test, and the one below, rely rather tenuously on the + // behavior of the AEC. Think of something more robust. + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + // Make sure we have extended filter enabled. This makes sure nothing is + // touched until we have a farend frame. + Config config; + config.Set<ExtendedFilter>(new ExtendedFilter(true)); + apm_->SetExtraOptions(config); + SetFrameTo(frame_, 1000); + frame_copy.CopyFrom(*frame_); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy)); + + // Check the test is valid. We should have distortion from the filter + // when AEC is enabled (which won't affect the audio). + frame_->samples_per_channel_ = 320; + frame_->num_channels_ = 2; + frame_->sample_rate_hz_ = 32000; + SetFrameTo(frame_, 1000); + frame_copy.CopyFrom(*frame_); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_FALSE(FrameDataAreEqual(*frame_, frame_copy)); +} + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP +void ApmTest::ProcessDebugDump(const std::string& in_filename, + const std::string& out_filename, + Format format, + int max_size_bytes) { + rtc::TaskQueue worker_queue("ApmTest_worker_queue"); + FILE* in_file = fopen(in_filename.c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + audioproc::Event event_msg; + bool first_init = true; + + while (ReadMessageFromFile(in_file, &event_msg)) { + if (event_msg.type() == audioproc::Event::INIT) { + const audioproc::Init msg = event_msg.init(); + int reverse_sample_rate = msg.sample_rate(); + if (msg.has_reverse_sample_rate()) { + reverse_sample_rate = msg.reverse_sample_rate(); + } + int output_sample_rate = msg.sample_rate(); + if (msg.has_output_sample_rate()) { + output_sample_rate = msg.output_sample_rate(); + } + + Init(msg.sample_rate(), + output_sample_rate, + reverse_sample_rate, + msg.num_input_channels(), + msg.num_output_channels(), + msg.num_reverse_channels(), + false); + if (first_init) { + // AttachAecDump() writes an additional init message. Don't start + // recording until after the first init to avoid the extra message. + auto aec_dump = + AecDumpFactory::Create(out_filename, max_size_bytes, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + first_init = false; + } + + } else if (event_msg.type() == audioproc::Event::REVERSE_STREAM) { + const audioproc::ReverseStream msg = event_msg.reverse_stream(); + + if (msg.channel_size() > 0) { + ASSERT_EQ(revframe_->num_channels_, + static_cast<size_t>(msg.channel_size())); + for (int i = 0; i < msg.channel_size(); ++i) { + memcpy(revfloat_cb_->channels()[i], + msg.channel(i).data(), + msg.channel(i).size()); + } + } else { + memcpy(revframe_->mutable_data(), msg.data().data(), msg.data().size()); + if (format == kFloatFormat) { + // We're using an int16 input file; convert to float. + ConvertToFloat(*revframe_, revfloat_cb_.get()); + } + } + AnalyzeReverseStreamChooser(format); + + } else if (event_msg.type() == audioproc::Event::STREAM) { + const audioproc::Stream msg = event_msg.stream(); + // ProcessStream could have changed this for the output frame. + frame_->num_channels_ = apm_->num_input_channels(); + + EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(msg.level())); + EXPECT_NOERR(apm_->set_stream_delay_ms(msg.delay())); + apm_->echo_cancellation()->set_stream_drift_samples(msg.drift()); + if (msg.has_keypress()) { + apm_->set_stream_key_pressed(msg.keypress()); + } else { + apm_->set_stream_key_pressed(true); + } + + if (msg.input_channel_size() > 0) { + ASSERT_EQ(frame_->num_channels_, + static_cast<size_t>(msg.input_channel_size())); + for (int i = 0; i < msg.input_channel_size(); ++i) { + memcpy(float_cb_->channels()[i], + msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + } else { + memcpy(frame_->mutable_data(), msg.input_data().data(), + msg.input_data().size()); + if (format == kFloatFormat) { + // We're using an int16 input file; convert to float. + ConvertToFloat(*frame_, float_cb_.get()); + } + } + ProcessStreamChooser(format); + } + } + apm_->DetachAecDump(); + fclose(in_file); +} + +void ApmTest::VerifyDebugDumpTest(Format format) { + const std::string in_filename = test::ResourcePath("ref03", "aecdump"); + std::string format_string; + switch (format) { + case kIntFormat: + format_string = "_int"; + break; + case kFloatFormat: + format_string = "_float"; + break; + } + const std::string ref_filename = test::TempFilename( + test::OutputPath(), std::string("ref") + format_string + "_aecdump"); + const std::string out_filename = test::TempFilename( + test::OutputPath(), std::string("out") + format_string + "_aecdump"); + const std::string limited_filename = test::TempFilename( + test::OutputPath(), std::string("limited") + format_string + "_aecdump"); + const size_t logging_limit_bytes = 100000; + // We expect at least this many bytes in the created logfile. + const size_t logging_expected_bytes = 95000; + EnableAllComponents(); + ProcessDebugDump(in_filename, ref_filename, format, -1); + ProcessDebugDump(ref_filename, out_filename, format, -1); + ProcessDebugDump(ref_filename, limited_filename, format, logging_limit_bytes); + + FILE* ref_file = fopen(ref_filename.c_str(), "rb"); + FILE* out_file = fopen(out_filename.c_str(), "rb"); + FILE* limited_file = fopen(limited_filename.c_str(), "rb"); + ASSERT_TRUE(ref_file != NULL); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(limited_file != NULL); + std::unique_ptr<uint8_t[]> ref_bytes; + std::unique_ptr<uint8_t[]> out_bytes; + std::unique_ptr<uint8_t[]> limited_bytes; + + size_t ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); + size_t out_size = ReadMessageBytesFromFile(out_file, &out_bytes); + size_t limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); + size_t bytes_read = 0; + size_t bytes_read_limited = 0; + while (ref_size > 0 && out_size > 0) { + bytes_read += ref_size; + bytes_read_limited += limited_size; + EXPECT_EQ(ref_size, out_size); + EXPECT_GE(ref_size, limited_size); + EXPECT_EQ(0, memcmp(ref_bytes.get(), out_bytes.get(), ref_size)); + EXPECT_EQ(0, memcmp(ref_bytes.get(), limited_bytes.get(), limited_size)); + ref_size = ReadMessageBytesFromFile(ref_file, &ref_bytes); + out_size = ReadMessageBytesFromFile(out_file, &out_bytes); + limited_size = ReadMessageBytesFromFile(limited_file, &limited_bytes); + } + EXPECT_GT(bytes_read, 0u); + EXPECT_GT(bytes_read_limited, logging_expected_bytes); + EXPECT_LE(bytes_read_limited, logging_limit_bytes); + EXPECT_NE(0, feof(ref_file)); + EXPECT_NE(0, feof(out_file)); + EXPECT_NE(0, feof(limited_file)); + ASSERT_EQ(0, fclose(ref_file)); + ASSERT_EQ(0, fclose(out_file)); + ASSERT_EQ(0, fclose(limited_file)); + remove(ref_filename.c_str()); + remove(out_filename.c_str()); + remove(limited_filename.c_str()); +} + +TEST_F(ApmTest, VerifyDebugDumpInt) { + VerifyDebugDumpTest(kIntFormat); +} + +TEST_F(ApmTest, VerifyDebugDumpFloat) { + VerifyDebugDumpTest(kFloatFormat); +} +#endif + +// TODO(andrew): expand test to verify output. +TEST_F(ApmTest, DebugDump) { + rtc::TaskQueue worker_queue("ApmTest_worker_queue"); + const std::string filename = + test::TempFilename(test::OutputPath(), "debug_aec"); + { + auto aec_dump = AecDumpFactory::Create("", -1, &worker_queue); + EXPECT_FALSE(aec_dump); + } + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // Stopping without having started should be OK. + apm_->DetachAecDump(); + + auto aec_dump = AecDumpFactory::Create(filename, -1, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(revframe_)); + apm_->DetachAecDump(); + + // Verify the file has been written. + FILE* fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +#else + // Verify the file has NOT been written. + ASSERT_TRUE(fopen(filename.c_str(), "r") == NULL); +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +// TODO(andrew): expand test to verify output. +TEST_F(ApmTest, DebugDumpFromFileHandle) { + rtc::TaskQueue worker_queue("ApmTest_worker_queue"); + + const std::string filename = + test::TempFilename(test::OutputPath(), "debug_aec"); + FILE* fid = fopen(filename.c_str(), "w"); + ASSERT_TRUE(fid); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // Stopping without having started should be OK. + apm_->DetachAecDump(); + + auto aec_dump = AecDumpFactory::Create(fid, -1, &worker_queue); + EXPECT_TRUE(aec_dump); + apm_->AttachAecDump(std::move(aec_dump)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(revframe_)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + apm_->DetachAecDump(); + + // Verify the file has been written. + fid = fopen(filename.c_str(), "r"); + ASSERT_TRUE(fid != NULL); + + // Clean it up. + ASSERT_EQ(0, fclose(fid)); + ASSERT_EQ(0, remove(filename.c_str())); +#else + ASSERT_EQ(0, fclose(fid)); +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { + audioproc::OutputData ref_data; + OpenFileAndReadMessage(ref_filename_, &ref_data); + + Config config; + config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); + std::unique_ptr<AudioProcessing> fapm(AudioProcessing::Create(config)); + EnableAllComponents(); + EnableAllAPComponents(fapm.get()); + for (int i = 0; i < ref_data.test_size(); i++) { + printf("Running test %d of %d...\n", i + 1, ref_data.test_size()); + + audioproc::Test* test = ref_data.mutable_test(i); + // TODO(ajm): Restore downmixing test cases. + if (test->num_input_channels() != test->num_output_channels()) + continue; + + const size_t num_render_channels = + static_cast<size_t>(test->num_reverse_channels()); + const size_t num_input_channels = + static_cast<size_t>(test->num_input_channels()); + const size_t num_output_channels = + static_cast<size_t>(test->num_output_channels()); + const size_t samples_per_channel = static_cast<size_t>( + test->sample_rate() * AudioProcessing::kChunkSizeMs / 1000); + + Init(test->sample_rate(), test->sample_rate(), test->sample_rate(), + num_input_channels, num_output_channels, num_render_channels, true); + Init(fapm.get()); + + ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels); + ChannelBuffer<int16_t> output_int16(samples_per_channel, + num_input_channels); + + int analog_level = 127; + size_t num_bad_chunks = 0; + while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && + ReadFrame(near_file_, frame_, float_cb_.get())) { + frame_->vad_activity_ = AudioFrame::kVadUnknown; + + EXPECT_NOERR(apm_->ProcessReverseStream(revframe_)); + EXPECT_NOERR(fapm->AnalyzeReverseStream( + revfloat_cb_->channels(), + samples_per_channel, + test->sample_rate(), + LayoutFromChannels(num_render_channels))); + + EXPECT_NOERR(apm_->set_stream_delay_ms(0)); + EXPECT_NOERR(fapm->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + fapm->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(analog_level)); + EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); + + EXPECT_NOERR(apm_->ProcessStream(frame_)); + Deinterleave(frame_->data(), samples_per_channel, num_output_channels, + output_int16.channels()); + + EXPECT_NOERR(fapm->ProcessStream( + float_cb_->channels(), + samples_per_channel, + test->sample_rate(), + LayoutFromChannels(num_input_channels), + test->sample_rate(), + LayoutFromChannels(num_output_channels), + float_cb_->channels())); + for (size_t j = 0; j < num_output_channels; ++j) { + FloatToS16(float_cb_->channels()[j], + samples_per_channel, + output_cb.channels()[j]); + float variance = 0; + float snr = ComputeSNR(output_int16.channels()[j], + output_cb.channels()[j], + samples_per_channel, &variance); + + const float kVarianceThreshold = 20; + const float kSNRThreshold = 20; + + // Skip frames with low energy. + if (sqrt(variance) > kVarianceThreshold && snr < kSNRThreshold) { + ++num_bad_chunks; + } + } + + analog_level = fapm->gain_control()->stream_analog_level(); + EXPECT_EQ(apm_->gain_control()->stream_analog_level(), + fapm->gain_control()->stream_analog_level()); + EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(), + fapm->echo_cancellation()->stream_has_echo()); + EXPECT_NEAR(apm_->noise_suppression()->speech_probability(), + fapm->noise_suppression()->speech_probability(), + 0.01); + + // Reset in case of downmixing. + frame_->num_channels_ = static_cast<size_t>(test->num_input_channels()); + } + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + const size_t kMaxNumBadChunks = 0; +#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + // There are a few chunks in the fixed-point profile that give low SNR. + // Listening confirmed the difference is acceptable. + const size_t kMaxNumBadChunks = 60; +#endif + EXPECT_LE(num_bad_chunks, kMaxNumBadChunks); + + rewind(far_file_); + rewind(near_file_); + } +} + +// TODO(andrew): Add a test to process a few frames with different combinations +// of enabled components. + +TEST_F(ApmTest, Process) { + GOOGLE_PROTOBUF_VERIFY_VERSION; + audioproc::OutputData ref_data; + + if (!write_ref_data) { + OpenFileAndReadMessage(ref_filename_, &ref_data); + } else { + // Write the desired tests to the protobuf reference file. + for (size_t i = 0; i < arraysize(kChannels); i++) { + for (size_t j = 0; j < arraysize(kChannels); j++) { + for (size_t l = 0; l < arraysize(kProcessSampleRates); l++) { + audioproc::Test* test = ref_data.add_test(); + test->set_num_reverse_channels(kChannels[i]); + test->set_num_input_channels(kChannels[j]); + test->set_num_output_channels(kChannels[j]); + test->set_sample_rate(kProcessSampleRates[l]); + test->set_use_aec_extended_filter(false); + } + } + } +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + // To test the extended filter mode. + audioproc::Test* test = ref_data.add_test(); + test->set_num_reverse_channels(2); + test->set_num_input_channels(2); + test->set_num_output_channels(2); + test->set_sample_rate(AudioProcessing::kSampleRate32kHz); + test->set_use_aec_extended_filter(true); +#endif + } + + for (int i = 0; i < ref_data.test_size(); i++) { + printf("Running test %d of %d...\n", i + 1, ref_data.test_size()); + + audioproc::Test* test = ref_data.mutable_test(i); + // TODO(ajm): We no longer allow different input and output channels. Skip + // these tests for now, but they should be removed from the set. + if (test->num_input_channels() != test->num_output_channels()) + continue; + + Config config; + config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); + config.Set<ExtendedFilter>( + new ExtendedFilter(test->use_aec_extended_filter())); + apm_.reset(AudioProcessing::Create(config)); + + EnableAllComponents(); + + Init(test->sample_rate(), + test->sample_rate(), + test->sample_rate(), + static_cast<size_t>(test->num_input_channels()), + static_cast<size_t>(test->num_output_channels()), + static_cast<size_t>(test->num_reverse_channels()), + true); + + int frame_count = 0; + int has_echo_count = 0; + int has_voice_count = 0; + int is_saturated_count = 0; + int analog_level = 127; + int analog_level_average = 0; + int max_output_average = 0; + float ns_speech_prob_average = 0.0f; +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + int stats_index = 0; +#endif + + while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) { + EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream(revframe_)); + + frame_->vad_activity_ = AudioFrame::kVadUnknown; + + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(analog_level)); + + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + + // Ensure the frame was downmixed properly. + EXPECT_EQ(static_cast<size_t>(test->num_output_channels()), + frame_->num_channels_); + + max_output_average += MaxAudioFrame(*frame_); + + if (apm_->echo_cancellation()->stream_has_echo()) { + has_echo_count++; + } + + analog_level = apm_->gain_control()->stream_analog_level(); + analog_level_average += analog_level; + if (apm_->gain_control()->stream_is_saturated()) { + is_saturated_count++; + } + if (apm_->voice_detection()->stream_has_voice()) { + has_voice_count++; + EXPECT_EQ(AudioFrame::kVadActive, frame_->vad_activity_); + } else { + EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_); + } + + ns_speech_prob_average += apm_->noise_suppression()->speech_probability(); + + size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_; + size_t write_count = fwrite(frame_->data(), + sizeof(int16_t), + frame_size, + out_file_); + ASSERT_EQ(frame_size, write_count); + + // Reset in case of downmixing. + frame_->num_channels_ = static_cast<size_t>(test->num_input_channels()); + frame_count++; + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + const int kStatsAggregationFrameNum = 100; // 1 second. + if (frame_count % kStatsAggregationFrameNum == 0) { + // Get echo metrics. + EchoCancellation::Metrics echo_metrics; + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->GetMetrics(&echo_metrics)); + + // Get delay metrics. + int median = 0; + int std = 0; + float fraction_poor_delays = 0; + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->GetDelayMetrics( + &median, &std, &fraction_poor_delays)); + + // Get RMS. + int rms_level = apm_->level_estimator()->RMS(); + EXPECT_LE(0, rms_level); + EXPECT_GE(127, rms_level); + + if (!write_ref_data) { + const audioproc::Test::EchoMetrics& reference = + test->echo_metrics(stats_index); + TestStats(echo_metrics.residual_echo_return_loss, + reference.residual_echo_return_loss()); + TestStats(echo_metrics.echo_return_loss, + reference.echo_return_loss()); + TestStats(echo_metrics.echo_return_loss_enhancement, + reference.echo_return_loss_enhancement()); + TestStats(echo_metrics.a_nlp, + reference.a_nlp()); + EXPECT_EQ(echo_metrics.divergent_filter_fraction, + reference.divergent_filter_fraction()); + + const audioproc::Test::DelayMetrics& reference_delay = + test->delay_metrics(stats_index); + EXPECT_EQ(reference_delay.median(), median); + EXPECT_EQ(reference_delay.std(), std); + EXPECT_EQ(reference_delay.fraction_poor_delays(), + fraction_poor_delays); + + EXPECT_EQ(test->rms_level(stats_index), rms_level); + + ++stats_index; + } else { + audioproc::Test::EchoMetrics* message = + test->add_echo_metrics(); + WriteStatsMessage(echo_metrics.residual_echo_return_loss, + message->mutable_residual_echo_return_loss()); + WriteStatsMessage(echo_metrics.echo_return_loss, + message->mutable_echo_return_loss()); + WriteStatsMessage(echo_metrics.echo_return_loss_enhancement, + message->mutable_echo_return_loss_enhancement()); + WriteStatsMessage(echo_metrics.a_nlp, + message->mutable_a_nlp()); + message->set_divergent_filter_fraction( + echo_metrics.divergent_filter_fraction); + + audioproc::Test::DelayMetrics* message_delay = + test->add_delay_metrics(); + message_delay->set_median(median); + message_delay->set_std(std); + message_delay->set_fraction_poor_delays(fraction_poor_delays); + + test->add_rms_level(rms_level); + } + } +#endif // defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE). + } + max_output_average /= frame_count; + analog_level_average /= frame_count; + ns_speech_prob_average /= frame_count; + + if (!write_ref_data) { + const int kIntNear = 1; + // When running the test on a N7 we get a {2, 6} difference of + // |has_voice_count| and |max_output_average| is up to 18 higher. + // All numbers being consistently higher on N7 compare to ref_data. + // TODO(bjornv): If we start getting more of these offsets on Android we + // should consider a different approach. Either using one slack for all, + // or generate a separate android reference. +#if defined(WEBRTC_ANDROID) + const int kHasVoiceCountOffset = 3; + const int kHasVoiceCountNear = 4; + const int kMaxOutputAverageOffset = 9; + const int kMaxOutputAverageNear = 9; +#else + const int kHasVoiceCountOffset = 0; + const int kHasVoiceCountNear = kIntNear; + const int kMaxOutputAverageOffset = 0; + const int kMaxOutputAverageNear = kIntNear; +#endif + EXPECT_NEAR(test->has_echo_count(), has_echo_count, kIntNear); + EXPECT_NEAR(test->has_voice_count(), + has_voice_count - kHasVoiceCountOffset, + kHasVoiceCountNear); + EXPECT_NEAR(test->is_saturated_count(), is_saturated_count, kIntNear); + + EXPECT_NEAR(test->analog_level_average(), analog_level_average, kIntNear); + EXPECT_NEAR(test->max_output_average(), + max_output_average - kMaxOutputAverageOffset, + kMaxOutputAverageNear); +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + const double kFloatNear = 0.0005; + EXPECT_NEAR(test->ns_speech_probability_average(), + ns_speech_prob_average, + kFloatNear); +#endif + } else { + test->set_has_echo_count(has_echo_count); + test->set_has_voice_count(has_voice_count); + test->set_is_saturated_count(is_saturated_count); + + test->set_analog_level_average(analog_level_average); + test->set_max_output_average(max_output_average); + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + EXPECT_LE(0.0f, ns_speech_prob_average); + EXPECT_GE(1.0f, ns_speech_prob_average); + test->set_ns_speech_probability_average(ns_speech_prob_average); +#endif + } + + rewind(far_file_); + rewind(near_file_); + } + + if (write_ref_data) { + OpenFileAndWriteMessage(ref_filename_, ref_data); + } +} + +TEST_F(ApmTest, NoErrorsWithKeyboardChannel) { + struct ChannelFormat { + AudioProcessing::ChannelLayout in_layout; + AudioProcessing::ChannelLayout out_layout; + }; + ChannelFormat cf[] = { + {AudioProcessing::kMonoAndKeyboard, AudioProcessing::kMono}, + {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono}, + {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo}, + }; + + std::unique_ptr<AudioProcessing> ap(AudioProcessing::Create()); + // Enable one component just to ensure some processing takes place. + ap->noise_suppression()->Enable(true); + for (size_t i = 0; i < arraysize(cf); ++i) { + const int in_rate = 44100; + const int out_rate = 48000; + ChannelBuffer<float> in_cb(SamplesFromRate(in_rate), + TotalChannelsFromLayout(cf[i].in_layout)); + ChannelBuffer<float> out_cb(SamplesFromRate(out_rate), + ChannelsFromLayout(cf[i].out_layout)); + + // Run over a few chunks. + for (int j = 0; j < 10; ++j) { + EXPECT_NOERR(ap->ProcessStream( + in_cb.channels(), + in_cb.num_frames(), + in_rate, + cf[i].in_layout, + out_rate, + cf[i].out_layout, + out_cb.channels())); + } + } +} + +// Compares the reference and test arrays over a region around the expected +// delay. Finds the highest SNR in that region and adds the variance and squared +// error results to the supplied accumulators. +void UpdateBestSNR(const float* ref, + const float* test, + size_t length, + int expected_delay, + double* variance_acc, + double* sq_error_acc) { + double best_snr = std::numeric_limits<double>::min(); + double best_variance = 0; + double best_sq_error = 0; + // Search over a region of eight samples around the expected delay. + for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4; + ++delay) { + double sq_error = 0; + double variance = 0; + for (size_t i = 0; i < length - delay; ++i) { + double error = test[i + delay] - ref[i]; + sq_error += error * error; + variance += ref[i] * ref[i]; + } + + if (sq_error == 0) { + *variance_acc += variance; + return; + } + double snr = variance / sq_error; + if (snr > best_snr) { + best_snr = snr; + best_variance = variance; + best_sq_error = sq_error; + } + } + + *variance_acc += best_variance; + *sq_error_acc += best_sq_error; +} + +// Used to test a multitude of sample rate and channel combinations. It works +// by first producing a set of reference files (in SetUpTestCase) that are +// assumed to be correct, as the used parameters are verified by other tests +// in this collection. Primarily the reference files are all produced at +// "native" rates which do not involve any resampling. + +// Each test pass produces an output file with a particular format. The output +// is matched against the reference file closest to its internal processing +// format. If necessary the output is resampled back to its process format. +// Due to the resampling distortion, we don't expect identical results, but +// enforce SNR thresholds which vary depending on the format. 0 is a special +// case SNR which corresponds to inf, or zero error. +typedef std::tuple<int, int, int, int, double, double> AudioProcessingTestData; +class AudioProcessingTest + : public testing::TestWithParam<AudioProcessingTestData> { + public: + AudioProcessingTest() + : input_rate_(std::get<0>(GetParam())), + output_rate_(std::get<1>(GetParam())), + reverse_input_rate_(std::get<2>(GetParam())), + reverse_output_rate_(std::get<3>(GetParam())), + expected_snr_(std::get<4>(GetParam())), + expected_reverse_snr_(std::get<5>(GetParam())) {} + + virtual ~AudioProcessingTest() {} + + static void SetUpTestCase() { + // Create all needed output reference files. + const int kNativeRates[] = {8000, 16000, 32000, 48000}; + const size_t kNumChannels[] = {1, 2}; + for (size_t i = 0; i < arraysize(kNativeRates); ++i) { + for (size_t j = 0; j < arraysize(kNumChannels); ++j) { + for (size_t k = 0; k < arraysize(kNumChannels); ++k) { + // The reference files always have matching input and output channels. + ProcessFormat(kNativeRates[i], kNativeRates[i], kNativeRates[i], + kNativeRates[i], kNumChannels[j], kNumChannels[j], + kNumChannels[k], kNumChannels[k], "ref"); + } + } + } + } + + void TearDown() { + // Remove "out" files after each test. + ClearTempOutFiles(); + } + + static void TearDownTestCase() { + ClearTempFiles(); + } + + // Runs a process pass on files with the given parameters and dumps the output + // to a file specified with |output_file_prefix|. Both forward and reverse + // output streams are dumped. + static void ProcessFormat(int input_rate, + int output_rate, + int reverse_input_rate, + int reverse_output_rate, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, + const std::string& output_file_prefix) { + Config config; + config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); + std::unique_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); + EnableAllAPComponents(ap.get()); + + ProcessingConfig processing_config = { + {{input_rate, num_input_channels}, + {output_rate, num_output_channels}, + {reverse_input_rate, num_reverse_input_channels}, + {reverse_output_rate, num_reverse_output_channels}}}; + ap->Initialize(processing_config); + + FILE* far_file = + fopen(ResourceFilePath("far", reverse_input_rate).c_str(), "rb"); + FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb"); + FILE* out_file = + fopen(OutputFilePath(output_file_prefix, input_rate, output_rate, + reverse_input_rate, reverse_output_rate, + num_input_channels, num_output_channels, + num_reverse_input_channels, + num_reverse_output_channels, kForward).c_str(), + "wb"); + FILE* rev_out_file = + fopen(OutputFilePath(output_file_prefix, input_rate, output_rate, + reverse_input_rate, reverse_output_rate, + num_input_channels, num_output_channels, + num_reverse_input_channels, + num_reverse_output_channels, kReverse).c_str(), + "wb"); + ASSERT_TRUE(far_file != NULL); + ASSERT_TRUE(near_file != NULL); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(rev_out_file != NULL); + + ChannelBuffer<float> fwd_cb(SamplesFromRate(input_rate), + num_input_channels); + ChannelBuffer<float> rev_cb(SamplesFromRate(reverse_input_rate), + num_reverse_input_channels); + ChannelBuffer<float> out_cb(SamplesFromRate(output_rate), + num_output_channels); + ChannelBuffer<float> rev_out_cb(SamplesFromRate(reverse_output_rate), + num_reverse_output_channels); + + // Temporary buffers. + const int max_length = + 2 * std::max(std::max(out_cb.num_frames(), rev_out_cb.num_frames()), + std::max(fwd_cb.num_frames(), rev_cb.num_frames())); + std::unique_ptr<float[]> float_data(new float[max_length]); + std::unique_ptr<int16_t[]> int_data(new int16_t[max_length]); + + int analog_level = 127; + while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) && + ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) { + EXPECT_NOERR(ap->ProcessReverseStream( + rev_cb.channels(), processing_config.reverse_input_stream(), + processing_config.reverse_output_stream(), rev_out_cb.channels())); + + EXPECT_NOERR(ap->set_stream_delay_ms(0)); + ap->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_NOERR(ap->gain_control()->set_stream_analog_level(analog_level)); + + EXPECT_NOERR(ap->ProcessStream( + fwd_cb.channels(), + fwd_cb.num_frames(), + input_rate, + LayoutFromChannels(num_input_channels), + output_rate, + LayoutFromChannels(num_output_channels), + out_cb.channels())); + + // Dump forward output to file. + Interleave(out_cb.channels(), out_cb.num_frames(), out_cb.num_channels(), + float_data.get()); + size_t out_length = out_cb.num_channels() * out_cb.num_frames(); + + ASSERT_EQ(out_length, + fwrite(float_data.get(), sizeof(float_data[0]), + out_length, out_file)); + + // Dump reverse output to file. + Interleave(rev_out_cb.channels(), rev_out_cb.num_frames(), + rev_out_cb.num_channels(), float_data.get()); + size_t rev_out_length = + rev_out_cb.num_channels() * rev_out_cb.num_frames(); + + ASSERT_EQ(rev_out_length, + fwrite(float_data.get(), sizeof(float_data[0]), rev_out_length, + rev_out_file)); + + analog_level = ap->gain_control()->stream_analog_level(); + } + fclose(far_file); + fclose(near_file); + fclose(out_file); + fclose(rev_out_file); + } + + protected: + int input_rate_; + int output_rate_; + int reverse_input_rate_; + int reverse_output_rate_; + double expected_snr_; + double expected_reverse_snr_; +}; + +TEST_P(AudioProcessingTest, Formats) { + struct ChannelFormat { + int num_input; + int num_output; + int num_reverse_input; + int num_reverse_output; + }; + ChannelFormat cf[] = { + {1, 1, 1, 1}, + {1, 1, 2, 1}, + {2, 1, 1, 1}, + {2, 1, 2, 1}, + {2, 2, 1, 1}, + {2, 2, 2, 2}, + }; + + for (size_t i = 0; i < arraysize(cf); ++i) { + ProcessFormat(input_rate_, output_rate_, reverse_input_rate_, + reverse_output_rate_, cf[i].num_input, cf[i].num_output, + cf[i].num_reverse_input, cf[i].num_reverse_output, "out"); + + // Verify output for both directions. + std::vector<StreamDirection> stream_directions; + stream_directions.push_back(kForward); + stream_directions.push_back(kReverse); + for (StreamDirection file_direction : stream_directions) { + const int in_rate = file_direction ? reverse_input_rate_ : input_rate_; + const int out_rate = file_direction ? reverse_output_rate_ : output_rate_; + const int out_num = + file_direction ? cf[i].num_reverse_output : cf[i].num_output; + const double expected_snr = + file_direction ? expected_reverse_snr_ : expected_snr_; + + const int min_ref_rate = std::min(in_rate, out_rate); + int ref_rate; + + if (min_ref_rate > 32000) { + ref_rate = 48000; + } else if (min_ref_rate > 16000) { + ref_rate = 32000; + } else if (min_ref_rate > 8000) { + ref_rate = 16000; + } else { + ref_rate = 8000; + } +#ifdef WEBRTC_ARCH_ARM_FAMILY + if (file_direction == kForward) { + ref_rate = std::min(ref_rate, 32000); + } +#endif + FILE* out_file = fopen( + OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_, + reverse_output_rate_, cf[i].num_input, + cf[i].num_output, cf[i].num_reverse_input, + cf[i].num_reverse_output, file_direction).c_str(), + "rb"); + // The reference files always have matching input and output channels. + FILE* ref_file = fopen( + OutputFilePath("ref", ref_rate, ref_rate, ref_rate, ref_rate, + cf[i].num_output, cf[i].num_output, + cf[i].num_reverse_output, cf[i].num_reverse_output, + file_direction).c_str(), + "rb"); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(ref_file != NULL); + + const size_t ref_length = SamplesFromRate(ref_rate) * out_num; + const size_t out_length = SamplesFromRate(out_rate) * out_num; + // Data from the reference file. + std::unique_ptr<float[]> ref_data(new float[ref_length]); + // Data from the output file. + std::unique_ptr<float[]> out_data(new float[out_length]); + // Data from the resampled output, in case the reference and output rates + // don't match. + std::unique_ptr<float[]> cmp_data(new float[ref_length]); + + PushResampler<float> resampler; + resampler.InitializeIfNeeded(out_rate, ref_rate, out_num); + + // Compute the resampling delay of the output relative to the reference, + // to find the region over which we should search for the best SNR. + float expected_delay_sec = 0; + if (in_rate != ref_rate) { + // Input resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(in_rate); + } + if (out_rate != ref_rate) { + // Output resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(ref_rate); + // Delay of converting the output back to its processing rate for + // testing. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(out_rate); + } + int expected_delay = + floor(expected_delay_sec * ref_rate + 0.5f) * out_num; + + double variance = 0; + double sq_error = 0; + while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) && + fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) { + float* out_ptr = out_data.get(); + if (out_rate != ref_rate) { + // Resample the output back to its internal processing rate if + // necssary. + ASSERT_EQ(ref_length, + static_cast<size_t>(resampler.Resample( + out_ptr, out_length, cmp_data.get(), ref_length))); + out_ptr = cmp_data.get(); + } + + // Update the |sq_error| and |variance| accumulators with the highest + // SNR of reference vs output. + UpdateBestSNR(ref_data.get(), out_ptr, ref_length, expected_delay, + &variance, &sq_error); + } + + std::cout << "(" << input_rate_ << ", " << output_rate_ << ", " + << reverse_input_rate_ << ", " << reverse_output_rate_ << ", " + << cf[i].num_input << ", " << cf[i].num_output << ", " + << cf[i].num_reverse_input << ", " << cf[i].num_reverse_output + << ", " << file_direction << "): "; + if (sq_error > 0) { + double snr = 10 * log10(variance / sq_error); + EXPECT_GE(snr, expected_snr); + EXPECT_NE(0, expected_snr); + std::cout << "SNR=" << snr << " dB" << std::endl; + } else { + std::cout << "SNR=inf dB" << std::endl; + } + + fclose(out_file); + fclose(ref_file); + } + } +} + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) +INSTANTIATE_TEST_CASE_P( + CommonFormats, + AudioProcessingTest, + testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 0, 0), + std::make_tuple(48000, 48000, 32000, 48000, 40, 30), + std::make_tuple(48000, 48000, 16000, 48000, 40, 20), + std::make_tuple(48000, 44100, 48000, 44100, 20, 20), + std::make_tuple(48000, 44100, 32000, 44100, 20, 15), + std::make_tuple(48000, 44100, 16000, 44100, 20, 15), + std::make_tuple(48000, 32000, 48000, 32000, 30, 35), + std::make_tuple(48000, 32000, 32000, 32000, 30, 0), + std::make_tuple(48000, 32000, 16000, 32000, 30, 20), + std::make_tuple(48000, 16000, 48000, 16000, 25, 20), + std::make_tuple(48000, 16000, 32000, 16000, 25, 20), + std::make_tuple(48000, 16000, 16000, 16000, 25, 0), + + std::make_tuple(44100, 48000, 48000, 48000, 30, 0), + std::make_tuple(44100, 48000, 32000, 48000, 30, 30), + std::make_tuple(44100, 48000, 16000, 48000, 30, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20), + std::make_tuple(44100, 44100, 32000, 44100, 20, 15), + std::make_tuple(44100, 44100, 16000, 44100, 20, 15), + std::make_tuple(44100, 32000, 48000, 32000, 30, 35), + std::make_tuple(44100, 32000, 32000, 32000, 30, 0), + std::make_tuple(44100, 32000, 16000, 32000, 30, 20), + std::make_tuple(44100, 16000, 48000, 16000, 25, 20), + std::make_tuple(44100, 16000, 32000, 16000, 25, 20), + std::make_tuple(44100, 16000, 16000, 16000, 25, 0), + + std::make_tuple(32000, 48000, 48000, 48000, 30, 0), + std::make_tuple(32000, 48000, 32000, 48000, 35, 30), + std::make_tuple(32000, 48000, 16000, 48000, 30, 20), + std::make_tuple(32000, 44100, 48000, 44100, 20, 20), + std::make_tuple(32000, 44100, 32000, 44100, 20, 15), + std::make_tuple(32000, 44100, 16000, 44100, 20, 15), + std::make_tuple(32000, 32000, 48000, 32000, 40, 35), + std::make_tuple(32000, 32000, 32000, 32000, 0, 0), + std::make_tuple(32000, 32000, 16000, 32000, 40, 20), + std::make_tuple(32000, 16000, 48000, 16000, 25, 20), + std::make_tuple(32000, 16000, 32000, 16000, 25, 20), + std::make_tuple(32000, 16000, 16000, 16000, 25, 0), + + std::make_tuple(16000, 48000, 48000, 48000, 25, 0), + std::make_tuple(16000, 48000, 32000, 48000, 25, 30), + std::make_tuple(16000, 48000, 16000, 48000, 25, 20), + std::make_tuple(16000, 44100, 48000, 44100, 15, 20), + std::make_tuple(16000, 44100, 32000, 44100, 15, 15), + std::make_tuple(16000, 44100, 16000, 44100, 15, 15), + std::make_tuple(16000, 32000, 48000, 32000, 25, 35), + std::make_tuple(16000, 32000, 32000, 32000, 25, 0), + std::make_tuple(16000, 32000, 16000, 32000, 25, 20), + std::make_tuple(16000, 16000, 48000, 16000, 40, 20), + std::make_tuple(16000, 16000, 32000, 16000, 40, 20), + std::make_tuple(16000, 16000, 16000, 16000, 0, 0))); + +#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) +INSTANTIATE_TEST_CASE_P( + CommonFormats, + AudioProcessingTest, + testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 20, 0), + std::make_tuple(48000, 48000, 32000, 48000, 20, 30), + std::make_tuple(48000, 48000, 16000, 48000, 20, 20), + std::make_tuple(48000, 44100, 48000, 44100, 15, 20), + std::make_tuple(48000, 44100, 32000, 44100, 15, 15), + std::make_tuple(48000, 44100, 16000, 44100, 15, 15), + std::make_tuple(48000, 32000, 48000, 32000, 20, 35), + std::make_tuple(48000, 32000, 32000, 32000, 20, 0), + std::make_tuple(48000, 32000, 16000, 32000, 20, 20), + std::make_tuple(48000, 16000, 48000, 16000, 20, 20), + std::make_tuple(48000, 16000, 32000, 16000, 20, 20), + std::make_tuple(48000, 16000, 16000, 16000, 20, 0), + + std::make_tuple(44100, 48000, 48000, 48000, 15, 0), + std::make_tuple(44100, 48000, 32000, 48000, 15, 30), + std::make_tuple(44100, 48000, 16000, 48000, 15, 20), + std::make_tuple(44100, 44100, 48000, 44100, 15, 20), + std::make_tuple(44100, 44100, 32000, 44100, 15, 15), + std::make_tuple(44100, 44100, 16000, 44100, 15, 15), + std::make_tuple(44100, 32000, 48000, 32000, 20, 35), + std::make_tuple(44100, 32000, 32000, 32000, 20, 0), + std::make_tuple(44100, 32000, 16000, 32000, 20, 20), + std::make_tuple(44100, 16000, 48000, 16000, 20, 20), + std::make_tuple(44100, 16000, 32000, 16000, 20, 20), + std::make_tuple(44100, 16000, 16000, 16000, 20, 0), + + std::make_tuple(32000, 48000, 48000, 48000, 35, 0), + std::make_tuple(32000, 48000, 32000, 48000, 65, 30), + std::make_tuple(32000, 48000, 16000, 48000, 40, 20), + std::make_tuple(32000, 44100, 48000, 44100, 20, 20), + std::make_tuple(32000, 44100, 32000, 44100, 20, 15), + std::make_tuple(32000, 44100, 16000, 44100, 20, 15), + std::make_tuple(32000, 32000, 48000, 32000, 35, 35), + std::make_tuple(32000, 32000, 32000, 32000, 0, 0), + std::make_tuple(32000, 32000, 16000, 32000, 40, 20), + std::make_tuple(32000, 16000, 48000, 16000, 20, 20), + std::make_tuple(32000, 16000, 32000, 16000, 20, 20), + std::make_tuple(32000, 16000, 16000, 16000, 20, 0), + + std::make_tuple(16000, 48000, 48000, 48000, 25, 0), + std::make_tuple(16000, 48000, 32000, 48000, 25, 30), + std::make_tuple(16000, 48000, 16000, 48000, 25, 20), + std::make_tuple(16000, 44100, 48000, 44100, 15, 20), + std::make_tuple(16000, 44100, 32000, 44100, 15, 15), + std::make_tuple(16000, 44100, 16000, 44100, 15, 15), + std::make_tuple(16000, 32000, 48000, 32000, 25, 35), + std::make_tuple(16000, 32000, 32000, 32000, 25, 0), + std::make_tuple(16000, 32000, 16000, 32000, 25, 20), + std::make_tuple(16000, 16000, 48000, 16000, 35, 20), + std::make_tuple(16000, 16000, 32000, 16000, 35, 20), + std::make_tuple(16000, 16000, 16000, 16000, 0, 0))); +#endif + +} // namespace + +TEST(ApmConfiguration, DefaultBehavior) { + // Verify that the level controller is default off, it can be activated using + // the config, and that the default initial level is maintained after the + // config has been applied. + std::unique_ptr<AudioProcessingImpl> apm( + new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config())); + AudioProcessing::Config config; + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits<float>::epsilon()); + config.level_controller.enabled = true; + apm->ApplyConfig(config); + EXPECT_TRUE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when the that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_TRUE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits<float>::epsilon()); +} + +TEST(ApmConfiguration, ValidConfigBehavior) { + // Verify that the initial level can be specified and is retained after the + // config has been applied. + std::unique_ptr<AudioProcessingImpl> apm( + new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config())); + AudioProcessing::Config config; + config.level_controller.initial_peak_level_dbfs = -50.f; + apm->ApplyConfig(config); + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when the that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(-50.f, apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits<float>::epsilon()); +} + +TEST(ApmConfiguration, InValidConfigBehavior) { + // Verify that the config is properly reset when nonproper values are applied + // for the initial level. + + // Verify that the config is properly reset when the specified initial peak + // level is too low. + std::unique_ptr<AudioProcessingImpl> apm( + new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config())); + AudioProcessing::Config config; + config.level_controller.enabled = true; + config.level_controller.initial_peak_level_dbfs = -101.f; + apm->ApplyConfig(config); + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when the that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits<float>::epsilon()); + + // Verify that the config is properly reset when the specified initial peak + // level is too high. + apm.reset(new rtc::RefCountedObject<AudioProcessingImpl>(webrtc::Config())); + config = AudioProcessing::Config(); + config.level_controller.enabled = true; + config.level_controller.initial_peak_level_dbfs = 1.f; + apm->ApplyConfig(config); + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits<float>::epsilon()); +} + +TEST(ApmConfiguration, EnablePostProcessing) { + // Verify that apm uses a capture post processing module if one is provided. + webrtc::Config webrtc_config; + auto mock_post_processor_ptr = + new testing::NiceMock<test::MockPostProcessing>(); + auto mock_post_processor = + std::unique_ptr<PostProcessing>(mock_post_processor_ptr); + rtc::scoped_refptr<AudioProcessing> apm = AudioProcessing::Create( + webrtc_config, std::move(mock_post_processor), nullptr, nullptr); + + AudioFrame audio; + audio.num_channels_ = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + + EXPECT_CALL(*mock_post_processor_ptr, Process(testing::_)).Times(1); + apm->ProcessStream(&audio); +} + +class MyEchoControlFactory : public EchoControlFactory { + public: + std::unique_ptr<EchoControl> Create(int sample_rate_hz) { + auto ec = new test::MockEchoControl(); + EXPECT_CALL(*ec, AnalyzeRender(testing::_)).Times(1); + EXPECT_CALL(*ec, AnalyzeCapture(testing::_)).Times(2); + EXPECT_CALL(*ec, ProcessCapture(testing::_, testing::_)).Times(2); + return std::unique_ptr<EchoControl>(ec); + } +}; + +TEST(ApmConfiguration, EchoControlInjection) { + // Verify that apm uses an injected echo controller if one is provided. + webrtc::Config webrtc_config; + std::unique_ptr<EchoControlFactory> echo_control_factory( + new MyEchoControlFactory()); + + rtc::scoped_refptr<AudioProcessing> apm = AudioProcessing::Create( + webrtc_config, nullptr, std::move(echo_control_factory), nullptr); + + AudioFrame audio; + audio.num_channels_ = 1; + SetFrameSampleRate(&audio, AudioProcessing::NativeRate::kSampleRate16kHz); + apm->ProcessStream(&audio); + apm->ProcessReverseStream(&audio); + apm->ProcessStream(&audio); +} + +std::unique_ptr<AudioProcessing> CreateApm(bool use_AEC2) { + Config old_config; + if (use_AEC2) { + old_config.Set<ExtendedFilter>(new ExtendedFilter(true)); + old_config.Set<DelayAgnostic>(new DelayAgnostic(true)); + } + std::unique_ptr<AudioProcessing> apm(AudioProcessing::Create(old_config)); + if (!apm) { + return apm; + } + + ProcessingConfig processing_config = { + {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}}; + + if (apm->Initialize(processing_config) != 0) { + return nullptr; + } + + // Disable all components except for an AEC and the residual echo detector. + AudioProcessing::Config config; + config.residual_echo_detector.enabled = true; + config.echo_canceller3.enabled = false; + config.high_pass_filter.enabled = false; + config.gain_controller2.enabled = false; + config.level_controller.enabled = false; + apm->ApplyConfig(config); + EXPECT_EQ(apm->gain_control()->Enable(false), 0); + EXPECT_EQ(apm->level_estimator()->Enable(false), 0); + EXPECT_EQ(apm->noise_suppression()->Enable(false), 0); + EXPECT_EQ(apm->voice_detection()->Enable(false), 0); + + if (use_AEC2) { + EXPECT_EQ(apm->echo_control_mobile()->Enable(false), 0); + EXPECT_EQ(apm->echo_cancellation()->enable_metrics(true), 0); + EXPECT_EQ(apm->echo_cancellation()->enable_delay_logging(true), 0); + EXPECT_EQ(apm->echo_cancellation()->Enable(true), 0); + } else { + EXPECT_EQ(apm->echo_cancellation()->Enable(false), 0); + EXPECT_EQ(apm->echo_control_mobile()->Enable(true), 0); + } + return apm; +} + +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) || defined(WEBRTC_MAC) +#define MAYBE_ApmStatistics DISABLED_ApmStatistics +#else +#define MAYBE_ApmStatistics ApmStatistics +#endif + +TEST(MAYBE_ApmStatistics, AEC2EnabledTest) { + // Set up APM with AEC2 and process some audio. + std::unique_ptr<AudioProcessing> apm = CreateApm(true); + ASSERT_TRUE(apm); + + // Set up an audioframe. + AudioFrame frame; + frame.num_channels_ = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate48kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.mutable_data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + // Do some processing. + for (int i = 0; i < 200; i++) { + EXPECT_EQ(apm->ProcessReverseStream(&frame), 0); + EXPECT_EQ(apm->set_stream_delay_ms(0), 0); + EXPECT_EQ(apm->ProcessStream(&frame), 0); + } + + // Test statistics interface. + AudioProcessingStats stats = apm->GetStatistics(true); + // We expect all statistics to be set and have a sensible value. + ASSERT_TRUE(stats.residual_echo_likelihood); + EXPECT_GE(*stats.residual_echo_likelihood, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood, 1.0); + ASSERT_TRUE(stats.residual_echo_likelihood_recent_max); + EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); + ASSERT_TRUE(stats.echo_return_loss); + EXPECT_NE(*stats.echo_return_loss, -100.0); + ASSERT_TRUE(stats.echo_return_loss_enhancement); + EXPECT_NE(*stats.echo_return_loss_enhancement, -100.0); + ASSERT_TRUE(stats.divergent_filter_fraction); + EXPECT_NE(*stats.divergent_filter_fraction, -1.0); + ASSERT_TRUE(stats.delay_standard_deviation_ms); + EXPECT_GE(*stats.delay_standard_deviation_ms, 0); + // We don't check stats.delay_median_ms since it takes too long to settle to a + // value. At least 20 seconds of data need to be processed before it will get + // a value, which would make this test take too much time. + + // If there are no receive streams, we expect the stats not to be set. The + // 'false' argument signals to APM that no receive streams are currently + // active. In that situation the statistics would get stuck at their last + // calculated value (AEC and echo detection need at least one stream in each + // direction), so to avoid that, they should not be set by APM. + stats = apm->GetStatistics(false); + EXPECT_FALSE(stats.residual_echo_likelihood); + EXPECT_FALSE(stats.residual_echo_likelihood_recent_max); + EXPECT_FALSE(stats.echo_return_loss); + EXPECT_FALSE(stats.echo_return_loss_enhancement); + EXPECT_FALSE(stats.divergent_filter_fraction); + EXPECT_FALSE(stats.delay_median_ms); + EXPECT_FALSE(stats.delay_standard_deviation_ms); +} + +TEST(MAYBE_ApmStatistics, AECMEnabledTest) { + // Set up APM with AECM and process some audio. + std::unique_ptr<AudioProcessing> apm = CreateApm(false); + ASSERT_TRUE(apm); + + // Set up an audioframe. + AudioFrame frame; + frame.num_channels_ = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate48kHz); + + // Fill the audio frame with a sawtooth pattern. + int16_t* ptr = frame.mutable_data(); + for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) { + ptr[i] = 10000 * ((i % 3) - 1); + } + + // Do some processing. + for (int i = 0; i < 200; i++) { + EXPECT_EQ(apm->ProcessReverseStream(&frame), 0); + EXPECT_EQ(apm->set_stream_delay_ms(0), 0); + EXPECT_EQ(apm->ProcessStream(&frame), 0); + } + + // Test statistics interface. + AudioProcessingStats stats = apm->GetStatistics(true); + // We expect only the residual echo detector statistics to be set and have a + // sensible value. + EXPECT_TRUE(stats.residual_echo_likelihood); + if (stats.residual_echo_likelihood) { + EXPECT_GE(*stats.residual_echo_likelihood, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood, 1.0); + } + EXPECT_TRUE(stats.residual_echo_likelihood_recent_max); + if (stats.residual_echo_likelihood_recent_max) { + EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); + } + EXPECT_FALSE(stats.echo_return_loss); + EXPECT_FALSE(stats.echo_return_loss_enhancement); + EXPECT_FALSE(stats.divergent_filter_fraction); + EXPECT_FALSE(stats.delay_median_ms); + EXPECT_FALSE(stats.delay_standard_deviation_ms); + + // If there are no receive streams, we expect the stats not to be set. + stats = apm->GetStatistics(false); + EXPECT_FALSE(stats.residual_echo_likelihood); + EXPECT_FALSE(stats.residual_echo_likelihood_recent_max); + EXPECT_FALSE(stats.echo_return_loss); + EXPECT_FALSE(stats.echo_return_loss_enhancement); + EXPECT_FALSE(stats.divergent_filter_fraction); + EXPECT_FALSE(stats.delay_median_ms); + EXPECT_FALSE(stats.delay_standard_deviation_ms); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util.cc new file mode 100644 index 0000000000..e853559140 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/beamformer/array_util.h" + +#include <algorithm> +#include <limits> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +const float kMaxDotProduct = 1e-6f; + +} // namespace + +float GetMinimumSpacing(const std::vector<Point>& array_geometry) { + RTC_CHECK_GT(array_geometry.size(), 1); + float mic_spacing = std::numeric_limits<float>::max(); + for (size_t i = 0; i < (array_geometry.size() - 1); ++i) { + for (size_t j = i + 1; j < array_geometry.size(); ++j) { + mic_spacing = + std::min(mic_spacing, Distance(array_geometry[i], array_geometry[j])); + } + } + return mic_spacing; +} + +Point PairDirection(const Point& a, const Point& b) { + return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()}; +} + +float DotProduct(const Point& a, const Point& b) { + return a.x() * b.x() + a.y() * b.y() + a.z() * b.z(); +} + +Point CrossProduct(const Point& a, const Point& b) { + return {a.y() * b.z() - a.z() * b.y(), a.z() * b.x() - a.x() * b.z(), + a.x() * b.y() - a.y() * b.x()}; +} + +bool AreParallel(const Point& a, const Point& b) { + Point cross_product = CrossProduct(a, b); + return DotProduct(cross_product, cross_product) < kMaxDotProduct; +} + +bool ArePerpendicular(const Point& a, const Point& b) { + return std::abs(DotProduct(a, b)) < kMaxDotProduct; +} + +rtc::Optional<Point> GetDirectionIfLinear( + const std::vector<Point>& array_geometry) { + RTC_DCHECK_GT(array_geometry.size(), 1); + const Point first_pair_direction = + PairDirection(array_geometry[0], array_geometry[1]); + for (size_t i = 2u; i < array_geometry.size(); ++i) { + const Point pair_direction = + PairDirection(array_geometry[i - 1], array_geometry[i]); + if (!AreParallel(first_pair_direction, pair_direction)) { + return rtc::nullopt; + } + } + return first_pair_direction; +} + +rtc::Optional<Point> GetNormalIfPlanar( + const std::vector<Point>& array_geometry) { + RTC_DCHECK_GT(array_geometry.size(), 1); + const Point first_pair_direction = + PairDirection(array_geometry[0], array_geometry[1]); + Point pair_direction(0.f, 0.f, 0.f); + size_t i = 2u; + bool is_linear = true; + for (; i < array_geometry.size() && is_linear; ++i) { + pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]); + if (!AreParallel(first_pair_direction, pair_direction)) { + is_linear = false; + } + } + if (is_linear) { + return rtc::nullopt; + } + const Point normal_direction = + CrossProduct(first_pair_direction, pair_direction); + for (; i < array_geometry.size(); ++i) { + pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]); + if (!ArePerpendicular(normal_direction, pair_direction)) { + return rtc::nullopt; + } + } + return normal_direction; +} + +rtc::Optional<Point> GetArrayNormalIfExists( + const std::vector<Point>& array_geometry) { + const rtc::Optional<Point> direction = GetDirectionIfLinear(array_geometry); + if (direction) { + return Point(direction->y(), -direction->x(), 0.f); + } + const rtc::Optional<Point> normal = GetNormalIfPlanar(array_geometry); + if (normal && normal->z() < kMaxDotProduct) { + return normal; + } + return rtc::nullopt; +} + +Point AzimuthToPoint(float azimuth) { + return Point(std::cos(azimuth), std::sin(azimuth), 0.f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util.h b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util.h new file mode 100644 index 0000000000..f234929693 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ +#define MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ + +#include <cmath> +#include <vector> + +#include "api/optional.h" + +namespace webrtc { + +// Coordinates in meters. The convention used is: +// x: the horizontal dimension, with positive to the right from the camera's +// perspective. +// y: the depth dimension, with positive forward from the camera's +// perspective. +// z: the vertical dimension, with positive upwards. +template<typename T> +struct CartesianPoint { + CartesianPoint() { + c[0] = 0; + c[1] = 0; + c[2] = 0; + } + CartesianPoint(T x, T y, T z) { + c[0] = x; + c[1] = y; + c[2] = z; + } + T x() const { return c[0]; } + T y() const { return c[1]; } + T z() const { return c[2]; } + T c[3]; +}; + +using Point = CartesianPoint<float>; + +// Calculates the direction from a to b. +Point PairDirection(const Point& a, const Point& b); + +float DotProduct(const Point& a, const Point& b); +Point CrossProduct(const Point& a, const Point& b); + +bool AreParallel(const Point& a, const Point& b); +bool ArePerpendicular(const Point& a, const Point& b); + +// Returns the minimum distance between any two Points in the given +// |array_geometry|. +float GetMinimumSpacing(const std::vector<Point>& array_geometry); + +// If the given array geometry is linear it returns the direction without +// normalizing. +rtc::Optional<Point> GetDirectionIfLinear( + const std::vector<Point>& array_geometry); + +// If the given array geometry is planar it returns the normal without +// normalizing. +rtc::Optional<Point> GetNormalIfPlanar( + const std::vector<Point>& array_geometry); + +// Returns the normal of an array if it has one and it is in the xy-plane. +rtc::Optional<Point> GetArrayNormalIfExists( + const std::vector<Point>& array_geometry); + +// The resulting Point will be in the xy-plane. +Point AzimuthToPoint(float azimuth); + +template<typename T> +float Distance(CartesianPoint<T> a, CartesianPoint<T> b) { + return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) + + (a.y() - b.y()) * (a.y() - b.y()) + + (a.z() - b.z()) * (a.z() - b.z())); +} + +// The convention used: +// azimuth: zero is to the right from the camera's perspective, with positive +// angles in radians counter-clockwise. +// elevation: zero is horizontal, with positive angles in radians upwards. +// radius: distance from the camera in meters. +template <typename T> +struct SphericalPoint { + SphericalPoint(T azimuth, T elevation, T radius) { + s[0] = azimuth; + s[1] = elevation; + s[2] = radius; + } + T azimuth() const { return s[0]; } + T elevation() const { return s[1]; } + T distance() const { return s[2]; } + T s[3]; +}; + +using SphericalPointf = SphericalPoint<float>; + +// Helper functions to transform degrees to radians and the inverse. +template <typename T> +T DegreesToRadians(T angle_degrees) { + return M_PI * angle_degrees / 180; +} + +template <typename T> +T RadiansToDegrees(T angle_radians) { + return 180 * angle_radians / M_PI; +} + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util_unittest.cc new file mode 100644 index 0000000000..a5c075ab05 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/array_util_unittest.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/beamformer/array_util.h" + +#include <math.h> +#include <vector> + +#include "test/gtest.h" + +namespace webrtc { + +bool operator==(const Point& lhs, const Point& rhs) { + return lhs.x() == rhs.x() && lhs.y() == rhs.y() && lhs.z() == rhs.z(); +} + +TEST(ArrayUtilTest, PairDirection) { + EXPECT_EQ(Point(1.f, 2.f, 3.f), + PairDirection(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_EQ(Point(-1.f, -2.f, -3.f), + PairDirection(Point(1.f, 2.f, 3.f), Point(0.f, 0.f, 0.f))); + EXPECT_EQ(Point(0.f, 0.f, 0.f), + PairDirection(Point(1.f, 0.f, 0.f), Point(1.f, 0.f, 0.f))); + EXPECT_EQ(Point(-1.f, 2.f, 0.f), + PairDirection(Point(1.f, 0.f, 0.f), Point(0.f, 2.f, 0.f))); + EXPECT_EQ(Point(-4.f, 4.f, -4.f), + PairDirection(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); +} + +TEST(ArrayUtilTest, DotProduct) { + EXPECT_FLOAT_EQ(0.f, DotProduct(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_FLOAT_EQ(0.f, DotProduct(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f))); + EXPECT_FLOAT_EQ(0.f, DotProduct(Point(1.f, 1.f, 0.f), Point(1.f, -1.f, 0.f))); + EXPECT_FLOAT_EQ(2.f, DotProduct(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f))); + EXPECT_FLOAT_EQ(-6.f, + DotProduct(Point(-2.f, 0.f, 0.f), Point(3.f, 0.f, 0.f))); + EXPECT_FLOAT_EQ(-10.f, + DotProduct(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); +} + +TEST(ArrayUtilTest, CrossProduct) { + EXPECT_EQ(Point(0.f, 0.f, 0.f), + CrossProduct(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_EQ(Point(0.f, 0.f, 1.f), + CrossProduct(Point(1.f, 0.f, 0.f), Point(0.f, 1.f, 0.f))); + EXPECT_EQ(Point(1.f, 0.f, 0.f), + CrossProduct(Point(0.f, 1.f, 0.f), Point(0.f, 0.f, 1.f))); + EXPECT_EQ(Point(0.f, -1.f, 0.f), + CrossProduct(Point(1.f, 0.f, 0.f), Point(0.f, 0.f, 1.f))); + EXPECT_EQ(Point(-4.f, -8.f, -4.f), + CrossProduct(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); +} + +TEST(ArrayUtilTest, AreParallel) { + EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_FALSE(AreParallel(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f))); + EXPECT_FALSE(AreParallel(Point(1.f, 2.f, 0.f), Point(1.f, -0.5f, 0.f))); + EXPECT_FALSE(AreParallel(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); + EXPECT_TRUE(AreParallel(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f))); + EXPECT_TRUE(AreParallel(Point(1.f, 2.f, 3.f), Point(-2.f, -4.f, -6.f))); +} + +TEST(ArrayUtilTest, ArePerpendicular) { + EXPECT_TRUE(ArePerpendicular(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f))); + EXPECT_TRUE(ArePerpendicular(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f))); + EXPECT_TRUE(ArePerpendicular(Point(1.f, 2.f, 0.f), Point(1.f, -0.5f, 0.f))); + EXPECT_FALSE(ArePerpendicular(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f))); + EXPECT_FALSE(ArePerpendicular(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f))); + EXPECT_FALSE(ArePerpendicular(Point(1.f, 2.f, 3.f), Point(-2.f, -4.f, -6.f))); +} + +TEST(ArrayUtilTest, GetMinimumSpacing) { + std::vector<Point> geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_FLOAT_EQ(0.1f, GetMinimumSpacing(geometry)); + geometry.push_back(Point(0.f, 0.05f, 0.f)); + EXPECT_FLOAT_EQ(0.05f, GetMinimumSpacing(geometry)); + geometry.push_back(Point(0.f, 0.f, 0.02f)); + EXPECT_FLOAT_EQ(0.02f, GetMinimumSpacing(geometry)); + geometry.push_back(Point(-0.003f, -0.004f, 0.02f)); + EXPECT_FLOAT_EQ(0.005f, GetMinimumSpacing(geometry)); +} + +TEST(ArrayUtilTest, GetDirectionIfLinear) { + std::vector<Point> geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(0.15f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(-0.2f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(0.05f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry))); + geometry.push_back(Point(0.1f, 0.1f, 0.f)); + EXPECT_FALSE(GetDirectionIfLinear(geometry)); + geometry.push_back(Point(0.f, 0.f, -0.2f)); + EXPECT_FALSE(GetDirectionIfLinear(geometry)); +} + +TEST(ArrayUtilTest, GetNormalIfPlanar) { + std::vector<Point> geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.15f, 0.f, 0.f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.1f, 0.2f, 0.f)); + EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 1.f), *GetNormalIfPlanar(geometry))); + geometry.push_back(Point(0.f, -0.15f, 0.f)); + EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 1.f), *GetNormalIfPlanar(geometry))); + geometry.push_back(Point(0.f, 0.1f, 0.2f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.f, 0.f, -0.15f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); + geometry.push_back(Point(0.1f, 0.2f, 0.f)); + EXPECT_FALSE(GetNormalIfPlanar(geometry)); +} + +TEST(ArrayUtilTest, GetArrayNormalIfExists) { + std::vector<Point> geometry; + geometry.push_back(Point(0.f, 0.f, 0.f)); + geometry.push_back(Point(0.1f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.15f, 0.f, 0.f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.1f, 0.f, 0.2f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.f, 0.f, -0.1f)); + EXPECT_TRUE( + AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry))); + geometry.push_back(Point(0.1f, 0.2f, 0.3f)); + EXPECT_FALSE(GetArrayNormalIfExists(geometry)); + geometry.push_back(Point(0.f, -0.1f, 0.f)); + EXPECT_FALSE(GetArrayNormalIfExists(geometry)); + geometry.push_back(Point(1.f, 0.f, -0.2f)); + EXPECT_FALSE(GetArrayNormalIfExists(geometry)); +} + +TEST(ArrayUtilTest, DegreesToRadians) { + EXPECT_FLOAT_EQ(0.f, DegreesToRadians(0.f)); + EXPECT_FLOAT_EQ(static_cast<float>(M_PI) / 6.f, DegreesToRadians(30.f)); + EXPECT_FLOAT_EQ(-static_cast<float>(M_PI) / 4.f, DegreesToRadians(-45.f)); + EXPECT_FLOAT_EQ(static_cast<float>(M_PI) / 3.f, DegreesToRadians(60.f)); + EXPECT_FLOAT_EQ(-static_cast<float>(M_PI) / 2.f, DegreesToRadians(-90.f)); + EXPECT_FLOAT_EQ(2.f * static_cast<float>(M_PI) / 3.f, + DegreesToRadians(120.f)); + EXPECT_FLOAT_EQ(-3.f * static_cast<float>(M_PI) / 4.f, + DegreesToRadians(-135.f)); + EXPECT_FLOAT_EQ(5.f * static_cast<float>(M_PI) / 6.f, + DegreesToRadians(150.f)); + EXPECT_FLOAT_EQ(-static_cast<float>(M_PI), DegreesToRadians(-180.f)); +} + +TEST(ArrayUtilTest, RadiansToDegrees) { + EXPECT_FLOAT_EQ(0.f, RadiansToDegrees(0.f)); + EXPECT_FLOAT_EQ(30.f, RadiansToDegrees(M_PI / 6.f)); + EXPECT_FLOAT_EQ(-45.f, RadiansToDegrees(-M_PI / 4.f)); + EXPECT_FLOAT_EQ(60.f, RadiansToDegrees(M_PI / 3.f)); + EXPECT_FLOAT_EQ(-90.f, RadiansToDegrees(-M_PI / 2.f)); + EXPECT_FLOAT_EQ(120.f, RadiansToDegrees(2.f * M_PI / 3.f)); + EXPECT_FLOAT_EQ(-135.f, RadiansToDegrees(-3.f * M_PI / 4.f)); + EXPECT_FLOAT_EQ(150.f, RadiansToDegrees(5.f * M_PI / 6.f)); + EXPECT_FLOAT_EQ(-180.f, RadiansToDegrees(-M_PI)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/complex_matrix.h b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/complex_matrix.h new file mode 100644 index 0000000000..9960e1dd87 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/complex_matrix.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_ +#define MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_ + +#include <complex> + +#include "modules/audio_processing/beamformer/matrix.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +using std::complex; + +// An extension of Matrix for operations that only work on a complex type. +template <typename T> +class ComplexMatrix : public Matrix<complex<T> > { + public: + ComplexMatrix() : Matrix<complex<T> >() {} + + ComplexMatrix(size_t num_rows, size_t num_columns) + : Matrix<complex<T> >(num_rows, num_columns) {} + + ComplexMatrix(const complex<T>* data, size_t num_rows, size_t num_columns) + : Matrix<complex<T> >(data, num_rows, num_columns) {} + + // Complex Matrix operations. + ComplexMatrix& PointwiseConjugate() { + complex<T>* const data = this->data(); + size_t size = this->num_rows() * this->num_columns(); + for (size_t i = 0; i < size; ++i) { + data[i] = conj(data[i]); + } + + return *this; + } + + ComplexMatrix& PointwiseConjugate(const ComplexMatrix& operand) { + this->CopyFrom(operand); + return PointwiseConjugate(); + } + + ComplexMatrix& ConjugateTranspose() { + this->CopyDataToScratch(); + size_t num_rows = this->num_rows(); + this->SetNumRows(this->num_columns()); + this->SetNumColumns(num_rows); + this->Resize(); + return ConjugateTranspose(this->scratch_elements()); + } + + ComplexMatrix& ConjugateTranspose(const ComplexMatrix& operand) { + RTC_CHECK_EQ(operand.num_rows(), this->num_columns()); + RTC_CHECK_EQ(operand.num_columns(), this->num_rows()); + return ConjugateTranspose(operand.elements()); + } + + ComplexMatrix& ZeroImag() { + complex<T>* const data = this->data(); + size_t size = this->num_rows() * this->num_columns(); + for (size_t i = 0; i < size; ++i) { + data[i] = complex<T>(data[i].real(), 0); + } + + return *this; + } + + ComplexMatrix& ZeroImag(const ComplexMatrix& operand) { + this->CopyFrom(operand); + return ZeroImag(); + } + + private: + ComplexMatrix& ConjugateTranspose(const complex<T>* const* src) { + complex<T>* const* elements = this->elements(); + for (size_t i = 0; i < this->num_rows(); ++i) { + for (size_t j = 0; j < this->num_columns(); ++j) { + elements[i][j] = conj(src[j][i]); + } + } + + return *this; + } +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/complex_matrix_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/complex_matrix_unittest.cc new file mode 100644 index 0000000000..e11dfd2b5c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/complex_matrix_unittest.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/beamformer/complex_matrix.h" +#include "modules/audio_processing/beamformer/matrix_test_helpers.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(ComplexMatrixTest, TestPointwiseConjugate) { + const int kNumRows = 2; + const int kNumCols = 4; + + const complex<float> kValuesInitial[kNumRows][kNumCols] = { + {complex<float>(1.1f, 1.1f), complex<float>(2.2f, -2.2f), + complex<float>(3.3f, 3.3f), complex<float>(4.4f, -4.4f)}, + {complex<float>(5.5f, 5.5f), complex<float>(6.6f, -6.6f), + complex<float>(7.7f, 7.7f), complex<float>(8.8f, -8.8f)}}; + + const complex<float> kValuesExpected[kNumRows][kNumCols] = { + {complex<float>(1.1f, -1.1f), complex<float>(2.2f, 2.2f), + complex<float>(3.3f, -3.3f), complex<float>(4.4f, 4.4f)}, + {complex<float>(5.5f, -5.5f), complex<float>(6.6f, 6.6f), + complex<float>(7.7f, -7.7f), complex<float>(8.8f, 8.8f)}}; + + ComplexMatrix<float> initial_mat(*kValuesInitial, kNumRows, kNumCols); + ComplexMatrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + ComplexMatrix<float> actual_result(kNumRows, kNumCols); + + actual_result.PointwiseConjugate(initial_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(expected_result, + actual_result); + + initial_mat.PointwiseConjugate(); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(initial_mat, + actual_result); +} + +TEST(ComplexMatrixTest, TestConjugateTranspose) { + const int kNumInitialRows = 2; + const int kNumInitialCols = 4; + const int kNumResultRows = 4; + const int kNumResultCols = 2; + + const complex<float> kValuesInitial[kNumInitialRows][kNumInitialCols] = { + {complex<float>(1.1f, 1.1f), complex<float>(2.2f, 2.2f), + complex<float>(3.3f, 3.3f), complex<float>(4.4f, 4.4f)}, + {complex<float>(5.5f, 5.5f), complex<float>(6.6f, 6.6f), + complex<float>(7.7f, 7.7f), complex<float>(8.8f, 8.8f)}}; + + const complex<float> kValuesExpected[kNumResultRows][kNumResultCols] = { + {complex<float>(1.1f, -1.1f), complex<float>(5.5f, -5.5f)}, + {complex<float>(2.2f, -2.2f), complex<float>(6.6f, -6.6f)}, + {complex<float>(3.3f, -3.3f), complex<float>(7.7f, -7.7f)}, + {complex<float>(4.4f, -4.4f), complex<float>(8.8f, -8.8f)}}; + + ComplexMatrix<float> initial_mat( + *kValuesInitial, kNumInitialRows, kNumInitialCols); + ComplexMatrix<float> expected_result( + *kValuesExpected, kNumResultRows, kNumResultCols); + ComplexMatrix<float> actual_result(kNumResultRows, kNumResultCols); + + actual_result.ConjugateTranspose(initial_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(expected_result, + actual_result); + + initial_mat.ConjugateTranspose(); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(initial_mat, + actual_result); +} + +TEST(ComplexMatrixTest, TestZeroImag) { + const int kNumRows = 2; + const int kNumCols = 2; + const complex<float> kValuesInitial[kNumRows][kNumCols] = { + {complex<float>(1.1f, 1.1f), complex<float>(2.2f, 2.2f)}, + {complex<float>(3.3f, 3.3f), complex<float>(4.4f, 4.4f)}}; + const complex<float> kValuesExpected[kNumRows][kNumCols] = { + {complex<float>(1.1f, 0.f), complex<float>(2.2f, 0.f)}, + {complex<float>(3.3f, 0.f), complex<float>(4.4f, 0.f)}}; + + ComplexMatrix<float> initial_mat(*kValuesInitial, kNumRows, kNumCols); + ComplexMatrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + ComplexMatrix<float> actual_result; + + actual_result.ZeroImag(initial_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(expected_result, + actual_result); + + initial_mat.ZeroImag(); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(initial_mat, + actual_result); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc new file mode 100644 index 0000000000..df36d5974f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/beamformer/covariance_matrix_generator.h" + +#include <cmath> + +namespace webrtc { +namespace { + +float BesselJ0(float x) { +#ifdef WEBRTC_WIN + return _j0(x); +#else + return j0(x); +#endif +} + +// Calculates the Euclidean norm for a row vector. +float Norm(const ComplexMatrix<float>& x) { + RTC_CHECK_EQ(1, x.num_rows()); + const size_t length = x.num_columns(); + const complex<float>* elems = x.elements()[0]; + float result = 0.f; + for (size_t i = 0u; i < length; ++i) { + result += std::norm(elems[i]); + } + return std::sqrt(result); +} + +} // namespace + +void CovarianceMatrixGenerator::UniformCovarianceMatrix( + float wave_number, + const std::vector<Point>& geometry, + ComplexMatrix<float>* mat) { + RTC_CHECK_EQ(geometry.size(), mat->num_rows()); + RTC_CHECK_EQ(geometry.size(), mat->num_columns()); + + complex<float>* const* mat_els = mat->elements(); + for (size_t i = 0; i < geometry.size(); ++i) { + for (size_t j = 0; j < geometry.size(); ++j) { + if (wave_number > 0.f) { + mat_els[i][j] = + BesselJ0(wave_number * Distance(geometry[i], geometry[j])); + } else { + mat_els[i][j] = i == j ? 1.f : 0.f; + } + } + } +} + +void CovarianceMatrixGenerator::AngledCovarianceMatrix( + float sound_speed, + float angle, + size_t frequency_bin, + size_t fft_size, + size_t num_freq_bins, + int sample_rate, + const std::vector<Point>& geometry, + ComplexMatrix<float>* mat) { + RTC_CHECK_EQ(geometry.size(), mat->num_rows()); + RTC_CHECK_EQ(geometry.size(), mat->num_columns()); + + ComplexMatrix<float> interf_cov_vector(1, geometry.size()); + ComplexMatrix<float> interf_cov_vector_transposed(geometry.size(), 1); + PhaseAlignmentMasks(frequency_bin, + fft_size, + sample_rate, + sound_speed, + geometry, + angle, + &interf_cov_vector); + interf_cov_vector.Scale(1.f / Norm(interf_cov_vector)); + interf_cov_vector_transposed.Transpose(interf_cov_vector); + interf_cov_vector.PointwiseConjugate(); + mat->Multiply(interf_cov_vector_transposed, interf_cov_vector); +} + +void CovarianceMatrixGenerator::PhaseAlignmentMasks( + size_t frequency_bin, + size_t fft_size, + int sample_rate, + float sound_speed, + const std::vector<Point>& geometry, + float angle, + ComplexMatrix<float>* mat) { + RTC_CHECK_EQ(1, mat->num_rows()); + RTC_CHECK_EQ(geometry.size(), mat->num_columns()); + + float freq_in_hertz = + (static_cast<float>(frequency_bin) / fft_size) * sample_rate; + + complex<float>* const* mat_els = mat->elements(); + for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) { + float distance = std::cos(angle) * geometry[c_ix].x() + + std::sin(angle) * geometry[c_ix].y(); + float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed; + + // Euler's formula for mat[0][c_ix] = e^(j * phase_shift). + mat_els[0][c_ix] = complex<float>(cos(phase_shift), sin(phase_shift)); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h new file mode 100644 index 0000000000..6a5841f631 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_ +#define MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_ + +#include "modules/audio_processing/beamformer/complex_matrix.h" +#include "modules/audio_processing/beamformer/array_util.h" + +namespace webrtc { + +// Helper class for Beamformer in charge of generating covariance matrices. For +// each function, the passed-in ComplexMatrix is expected to be of size +// |num_input_channels| x |num_input_channels|. +class CovarianceMatrixGenerator { + public: + // A uniform covariance matrix with a gap at the target location. WARNING: + // The target angle is assumed to be 0. + static void UniformCovarianceMatrix(float wave_number, + const std::vector<Point>& geometry, + ComplexMatrix<float>* mat); + + // The covariance matrix of a source at the given angle. + static void AngledCovarianceMatrix(float sound_speed, + float angle, + size_t frequency_bin, + size_t fft_size, + size_t num_freq_bins, + int sample_rate, + const std::vector<Point>& geometry, + ComplexMatrix<float>* mat); + + // Calculates phase shifts that, when applied to a multichannel signal and + // added together, cause constructive interferernce for sources located at + // the given angle. + static void PhaseAlignmentMasks(size_t frequency_bin, + size_t fft_size, + int sample_rate, + float sound_speed, + const std::vector<Point>& geometry, + float angle, + ComplexMatrix<float>* mat); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_BEAMFORMER_BF_HELPERS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator_unittest.cc new file mode 100644 index 0000000000..a6518e5d95 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator_unittest.cc @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/beamformer/covariance_matrix_generator.h" + +#include <cmath> + +#include "modules/audio_processing/beamformer/matrix_test_helpers.h" +#include "test/gtest.h" + +namespace webrtc { + +using std::complex; + +TEST(CovarianceMatrixGeneratorTest, TestUniformCovarianceMatrix2Mics) { + const float kWaveNumber = 0.5775f; + const int kNumberMics = 2; + const float kMicSpacing = 0.05f; + const float kTolerance = 0.0001f; + std::vector<Point> geometry; + float first_mic = (kNumberMics - 1) * kMicSpacing / 2.f; + for (int i = 0; i < kNumberMics; ++i) { + geometry.push_back(Point(i * kMicSpacing - first_mic, 0.f, 0.f)); + } + ComplexMatrix<float> actual_covariance_matrix(kNumberMics, kNumberMics); + CovarianceMatrixGenerator::UniformCovarianceMatrix(kWaveNumber, + geometry, + &actual_covariance_matrix); + + complex<float>* const* actual_els = actual_covariance_matrix.elements(); + + EXPECT_NEAR(actual_els[0][0].real(), 1.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].real(), 0.9998f, kTolerance); + EXPECT_NEAR(actual_els[1][0].real(), 0.9998f, kTolerance); + EXPECT_NEAR(actual_els[1][1].real(), 1.f, kTolerance); + + EXPECT_NEAR(actual_els[0][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][1].imag(), 0.f, kTolerance); +} + +TEST(CovarianceMatrixGeneratorTest, TestUniformCovarianceMatrix3Mics) { + const float kWaveNumber = 10.3861f; + const int kNumberMics = 3; + const float kMicSpacing = 0.04f; + const float kTolerance = 0.0001f; + std::vector<Point> geometry; + float first_mic = (kNumberMics - 1) * kMicSpacing / 2.f; + for (int i = 0; i < kNumberMics; ++i) { + geometry.push_back(Point(i * kMicSpacing - first_mic, 0.f, 0.f)); + } + ComplexMatrix<float> actual_covariance_matrix(kNumberMics, kNumberMics); + CovarianceMatrixGenerator::UniformCovarianceMatrix(kWaveNumber, + geometry, + &actual_covariance_matrix); + + complex<float>* const* actual_els = actual_covariance_matrix.elements(); + + EXPECT_NEAR(actual_els[0][0].real(), 1.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].real(), 0.9573f, kTolerance); + EXPECT_NEAR(actual_els[0][2].real(), 0.8347f, kTolerance); + EXPECT_NEAR(actual_els[1][0].real(), 0.9573f, kTolerance); + EXPECT_NEAR(actual_els[1][1].real(), 1.f, kTolerance); + EXPECT_NEAR(actual_els[1][2].real(), 0.9573f, kTolerance); + EXPECT_NEAR(actual_els[2][0].real(), 0.8347f, kTolerance); + EXPECT_NEAR(actual_els[2][1].real(), 0.9573f, kTolerance); + EXPECT_NEAR(actual_els[2][2].real(), 1.f, kTolerance); + + EXPECT_NEAR(actual_els[0][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][2].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][2].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[2][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[2][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[2][2].imag(), 0.f, kTolerance); +} + +TEST(CovarianceMatrixGeneratorTest, TestUniformCovarianceMatrix3DArray) { + const float kWaveNumber = 1.2345f; + const int kNumberMics = 4; + const float kTolerance = 0.0001f; + std::vector<Point> geometry; + geometry.push_back(Point(-0.025f, -0.05f, -0.075f)); + geometry.push_back(Point(0.075f, -0.05f, -0.075f)); + geometry.push_back(Point(-0.025f, 0.15f, -0.075f)); + geometry.push_back(Point(-0.025f, -0.05f, 0.225f)); + ComplexMatrix<float> actual_covariance_matrix(kNumberMics, kNumberMics); + CovarianceMatrixGenerator::UniformCovarianceMatrix(kWaveNumber, + geometry, + &actual_covariance_matrix); + + complex<float>* const* actual_els = actual_covariance_matrix.elements(); + + EXPECT_NEAR(actual_els[0][0].real(), 1.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].real(), 0.9962f, kTolerance); + EXPECT_NEAR(actual_els[0][2].real(), 0.9848f, kTolerance); + EXPECT_NEAR(actual_els[0][3].real(), 0.9660f, kTolerance); + EXPECT_NEAR(actual_els[1][0].real(), 0.9962f, kTolerance); + EXPECT_NEAR(actual_els[1][1].real(), 1.f, kTolerance); + EXPECT_NEAR(actual_els[1][2].real(), 0.9810f, kTolerance); + EXPECT_NEAR(actual_els[1][3].real(), 0.9623f, kTolerance); + EXPECT_NEAR(actual_els[2][0].real(), 0.9848f, kTolerance); + EXPECT_NEAR(actual_els[2][1].real(), 0.9810f, kTolerance); + EXPECT_NEAR(actual_els[2][2].real(), 1.f, kTolerance); + EXPECT_NEAR(actual_els[2][3].real(), 0.9511f, kTolerance); + EXPECT_NEAR(actual_els[3][0].real(), 0.9660f, kTolerance); + EXPECT_NEAR(actual_els[3][1].real(), 0.9623f, kTolerance); + EXPECT_NEAR(actual_els[3][2].real(), 0.9511f, kTolerance); + EXPECT_NEAR(actual_els[3][3].real(), 1.f, kTolerance); + + EXPECT_NEAR(actual_els[0][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][2].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][3].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][2].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][3].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[2][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[2][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[2][2].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[2][3].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[3][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[3][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[3][2].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[3][3].imag(), 0.f, kTolerance); +} + +TEST(CovarianceMatrixGeneratorTest, TestAngledCovarianceMatrix2Mics) { + const float kSpeedOfSound = 340; + const float kAngle = static_cast<float>(M_PI) / 4.f; + const float kFrequencyBin = 6; + const float kFftSize = 512; + const int kNumberFrequencyBins = 257; + const int kSampleRate = 16000; + const int kNumberMics = 2; + const float kMicSpacing = 0.04f; + const float kTolerance = 0.0001f; + std::vector<Point> geometry; + float first_mic = (kNumberMics - 1) * kMicSpacing / 2.f; + for (int i = 0; i < kNumberMics; ++i) { + geometry.push_back(Point(i * kMicSpacing - first_mic, 0.f, 0.f)); + } + ComplexMatrix<float> actual_covariance_matrix(kNumberMics, kNumberMics); + CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSound, + kAngle, + kFrequencyBin, + kFftSize, + kNumberFrequencyBins, + kSampleRate, + geometry, + &actual_covariance_matrix); + + complex<float>* const* actual_els = actual_covariance_matrix.elements(); + + EXPECT_NEAR(actual_els[0][0].real(), 0.5f, kTolerance); + EXPECT_NEAR(actual_els[0][1].real(), 0.4976f, kTolerance); + EXPECT_NEAR(actual_els[1][0].real(), 0.4976f, kTolerance); + EXPECT_NEAR(actual_els[1][1].real(), 0.5f, kTolerance); + + EXPECT_NEAR(actual_els[0][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].imag(), 0.0489f, kTolerance); + EXPECT_NEAR(actual_els[1][0].imag(), -0.0489f, kTolerance); + EXPECT_NEAR(actual_els[1][1].imag(), 0.f, kTolerance); +} + +TEST(CovarianceMatrixGeneratorTest, TestAngledCovarianceMatrix3Mics) { + const float kSpeedOfSound = 340; + const float kAngle = static_cast<float>(M_PI) / 4.f; + const float kFrequencyBin = 9; + const float kFftSize = 512; + const int kNumberFrequencyBins = 257; + const int kSampleRate = 42000; + const int kNumberMics = 3; + const float kMicSpacing = 0.05f; + const float kTolerance = 0.0001f; + std::vector<Point> geometry; + float first_mic = (kNumberMics - 1) * kMicSpacing / 2.f; + for (int i = 0; i < kNumberMics; ++i) { + geometry.push_back(Point(i * kMicSpacing - first_mic, 0.f, 0.f)); + } + ComplexMatrix<float> actual_covariance_matrix(kNumberMics, kNumberMics); + CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSound, + kAngle, + kFrequencyBin, + kFftSize, + kNumberFrequencyBins, + kSampleRate, + geometry, + &actual_covariance_matrix); + + complex<float>* const* actual_els = actual_covariance_matrix.elements(); + + EXPECT_NEAR(actual_els[0][0].real(), 0.3333f, kTolerance); + EXPECT_NEAR(actual_els[0][1].real(), 0.2953f, kTolerance); + EXPECT_NEAR(actual_els[0][2].real(), 0.1899f, kTolerance); + EXPECT_NEAR(actual_els[1][0].real(), 0.2953f, kTolerance); + EXPECT_NEAR(actual_els[1][1].real(), 0.3333f, kTolerance); + EXPECT_NEAR(actual_els[1][2].real(), 0.2953f, kTolerance); + EXPECT_NEAR(actual_els[2][0].real(), 0.1899f, kTolerance); + EXPECT_NEAR(actual_els[2][1].real(), 0.2953f, kTolerance); + EXPECT_NEAR(actual_els[2][2].real(), 0.3333f, kTolerance); + + EXPECT_NEAR(actual_els[0][0].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[0][1].imag(), 0.1546f, kTolerance); + EXPECT_NEAR(actual_els[0][2].imag(), 0.274f, kTolerance); + EXPECT_NEAR(actual_els[1][0].imag(), -0.1546f, kTolerance); + EXPECT_NEAR(actual_els[1][1].imag(), 0.f, kTolerance); + EXPECT_NEAR(actual_els[1][2].imag(), 0.1546f, kTolerance); + EXPECT_NEAR(actual_els[2][0].imag(), -0.274f, kTolerance); + EXPECT_NEAR(actual_els[2][1].imag(), -0.1546f, kTolerance); + EXPECT_NEAR(actual_els[2][2].imag(), 0.f, kTolerance); +} + +// PhaseAlignmentMasks is tested by AngledCovarianceMatrix and by +// InitBeamformerWeights in BeamformerUnittest. + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix.h b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix.h new file mode 100644 index 0000000000..bf94c25df0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix.h @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_ +#define MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_ + +#include <algorithm> +#include <cstring> +#include <string> +#include <vector> + +#include "rtc_base/checks.h" +#include "rtc_base/constructormagic.h" + +namespace { + +// Wrappers to get around the compiler warning resulting from the fact that +// there's no std::sqrt overload for ints. We cast all non-complex types to +// a double for the sqrt method. +template <typename T> +T sqrt_wrapper(T x) { + return sqrt(static_cast<double>(x)); +} + +template <typename S> +std::complex<S> sqrt_wrapper(std::complex<S> x) { + return sqrt(x); +} +} // namespace + +namespace webrtc { + +// Matrix is a class for doing standard matrix operations on 2 dimensional +// matrices of any size. Results of matrix operations are stored in the +// calling object. Function overloads exist for both in-place (the calling +// object is used as both an operand and the result) and out-of-place (all +// operands are passed in as parameters) operations. If operand dimensions +// mismatch, the program crashes. Out-of-place operations change the size of +// the calling object, if necessary, before operating. +// +// 'In-place' operations that inherently change the size of the matrix (eg. +// Transpose, Multiply on different-sized matrices) must make temporary copies +// (|scratch_elements_| and |scratch_data_|) of existing data to complete the +// operations. +// +// The data is stored contiguously. Data can be accessed internally as a flat +// array, |data_|, or as an array of row pointers, |elements_|, but is +// available to users only as an array of row pointers through |elements()|. +// Memory for storage is allocated when a matrix is resized only if the new +// size overflows capacity. Memory needed temporarily for any operations is +// similarly resized only if the new size overflows capacity. +// +// If you pass in storage through the ctor, that storage is copied into the +// matrix. TODO(claguna): albeit tricky, allow for data to be referenced +// instead of copied, and owned by the user. +template <typename T> +class Matrix { + public: + Matrix() : num_rows_(0), num_columns_(0) {} + + // Allocates space for the elements and initializes all values to zero. + Matrix(size_t num_rows, size_t num_columns) + : num_rows_(num_rows), num_columns_(num_columns) { + Resize(); + scratch_data_.resize(num_rows_ * num_columns_); + scratch_elements_.resize(num_rows_); + } + + // Copies |data| into the new Matrix. + Matrix(const T* data, size_t num_rows, size_t num_columns) + : num_rows_(0), num_columns_(0) { + CopyFrom(data, num_rows, num_columns); + scratch_data_.resize(num_rows_ * num_columns_); + scratch_elements_.resize(num_rows_); + } + + virtual ~Matrix() {} + + // Deep copy an existing matrix. + void CopyFrom(const Matrix& other) { + CopyFrom(&other.data_[0], other.num_rows_, other.num_columns_); + } + + // Copy |data| into the Matrix. The current data is lost. + void CopyFrom(const T* const data, size_t num_rows, size_t num_columns) { + Resize(num_rows, num_columns); + memcpy(&data_[0], data, num_rows_ * num_columns_ * sizeof(data_[0])); + } + + Matrix& CopyFromColumn(const T* const* src, + size_t column_index, + size_t num_rows) { + Resize(1, num_rows); + for (size_t i = 0; i < num_columns_; ++i) { + data_[i] = src[i][column_index]; + } + + return *this; + } + + void Resize(size_t num_rows, size_t num_columns) { + if (num_rows != num_rows_ || num_columns != num_columns_) { + num_rows_ = num_rows; + num_columns_ = num_columns; + Resize(); + } + } + + // Accessors and mutators. + size_t num_rows() const { return num_rows_; } + size_t num_columns() const { return num_columns_; } + T* const* elements() { return &elements_[0]; } + const T* const* elements() const { return &elements_[0]; } + + T Trace() { + RTC_CHECK_EQ(num_rows_, num_columns_); + + T trace = 0; + for (size_t i = 0; i < num_rows_; ++i) { + trace += elements_[i][i]; + } + return trace; + } + + // Matrix Operations. Returns *this to support method chaining. + Matrix& Transpose() { + CopyDataToScratch(); + Resize(num_columns_, num_rows_); + return Transpose(scratch_elements()); + } + + Matrix& Transpose(const Matrix& operand) { + RTC_CHECK_EQ(operand.num_rows_, num_columns_); + RTC_CHECK_EQ(operand.num_columns_, num_rows_); + + return Transpose(operand.elements()); + } + + template <typename S> + Matrix& Scale(const S& scalar) { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] *= scalar; + } + + return *this; + } + + template <typename S> + Matrix& Scale(const Matrix& operand, const S& scalar) { + CopyFrom(operand); + return Scale(scalar); + } + + Matrix& Add(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] += operand.data_[i]; + } + + return *this; + } + + Matrix& Add(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return Add(rhs); + } + + Matrix& Subtract(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] -= operand.data_[i]; + } + + return *this; + } + + Matrix& Subtract(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return Subtract(rhs); + } + + Matrix& PointwiseMultiply(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] *= operand.data_[i]; + } + + return *this; + } + + Matrix& PointwiseMultiply(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return PointwiseMultiply(rhs); + } + + Matrix& PointwiseDivide(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] /= operand.data_[i]; + } + + return *this; + } + + Matrix& PointwiseDivide(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return PointwiseDivide(rhs); + } + + Matrix& PointwiseSquareRoot() { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] = sqrt_wrapper(data_[i]); + } + + return *this; + } + + Matrix& PointwiseSquareRoot(const Matrix& operand) { + CopyFrom(operand); + return PointwiseSquareRoot(); + } + + Matrix& PointwiseAbsoluteValue() { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] = abs(data_[i]); + } + + return *this; + } + + Matrix& PointwiseAbsoluteValue(const Matrix& operand) { + CopyFrom(operand); + return PointwiseAbsoluteValue(); + } + + Matrix& PointwiseSquare() { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] *= data_[i]; + } + + return *this; + } + + Matrix& PointwiseSquare(const Matrix& operand) { + CopyFrom(operand); + return PointwiseSquare(); + } + + Matrix& Multiply(const Matrix& lhs, const Matrix& rhs) { + RTC_CHECK_EQ(lhs.num_columns_, rhs.num_rows_); + RTC_CHECK_EQ(num_rows_, lhs.num_rows_); + RTC_CHECK_EQ(num_columns_, rhs.num_columns_); + + return Multiply(lhs.elements(), rhs.num_rows_, rhs.elements()); + } + + Matrix& Multiply(const Matrix& rhs) { + RTC_CHECK_EQ(num_columns_, rhs.num_rows_); + + CopyDataToScratch(); + Resize(num_rows_, rhs.num_columns_); + return Multiply(scratch_elements(), rhs.num_rows_, rhs.elements()); + } + + std::string ToString() const { + std::ostringstream ss; + ss << std::endl << "Matrix" << std::endl; + + for (size_t i = 0; i < num_rows_; ++i) { + for (size_t j = 0; j < num_columns_; ++j) { + ss << elements_[i][j] << " "; + } + ss << std::endl; + } + ss << std::endl; + + return ss.str(); + } + + protected: + void SetNumRows(const size_t num_rows) { num_rows_ = num_rows; } + void SetNumColumns(const size_t num_columns) { num_columns_ = num_columns; } + T* data() { return &data_[0]; } + const T* data() const { return &data_[0]; } + const T* const* scratch_elements() const { return &scratch_elements_[0]; } + + // Resize the matrix. If an increase in capacity is required, the current + // data is lost. + void Resize() { + size_t size = num_rows_ * num_columns_; + data_.resize(size); + elements_.resize(num_rows_); + + for (size_t i = 0; i < num_rows_; ++i) { + elements_[i] = &data_[i * num_columns_]; + } + } + + // Copies data_ into scratch_data_ and updates scratch_elements_ accordingly. + void CopyDataToScratch() { + scratch_data_ = data_; + scratch_elements_.resize(num_rows_); + + for (size_t i = 0; i < num_rows_; ++i) { + scratch_elements_[i] = &scratch_data_[i * num_columns_]; + } + } + + private: + size_t num_rows_; + size_t num_columns_; + std::vector<T> data_; + std::vector<T*> elements_; + + // Stores temporary copies of |data_| and |elements_| for in-place operations + // where referring to original data is necessary. + std::vector<T> scratch_data_; + std::vector<T*> scratch_elements_; + + // Helpers for Transpose and Multiply operations that unify in-place and + // out-of-place solutions. + Matrix& Transpose(const T* const* src) { + for (size_t i = 0; i < num_rows_; ++i) { + for (size_t j = 0; j < num_columns_; ++j) { + elements_[i][j] = src[j][i]; + } + } + + return *this; + } + + Matrix& Multiply(const T* const* lhs, + size_t num_rows_rhs, + const T* const* rhs) { + for (size_t row = 0; row < num_rows_; ++row) { + for (size_t col = 0; col < num_columns_; ++col) { + T cur_element = 0; + for (size_t i = 0; i < num_rows_rhs; ++i) { + cur_element += lhs[row][i] * rhs[i][col]; + } + + elements_[row][col] = cur_element; + } + } + + return *this; + } + + RTC_DISALLOW_COPY_AND_ASSIGN(Matrix); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h new file mode 100644 index 0000000000..62b47082e3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_ +#define MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_ + +#include "modules/audio_processing/beamformer/complex_matrix.h" +#include "modules/audio_processing/beamformer/matrix.h" +#include "test/gtest.h" + +namespace { +const float kTolerance = 0.001f; +} + +namespace webrtc { + +using std::complex; + +// Functions used in both matrix_unittest and complex_matrix_unittest. +class MatrixTestHelpers { + public: + template <typename T> + static void ValidateMatrixEquality(const Matrix<T>& expected, + const Matrix<T>& actual) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const T* const* expected_elements = expected.elements(); + const T* const* actual_elements = actual.elements(); + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { + EXPECT_EQ(expected_elements[i][j], actual_elements[i][j]); + } + } + } + + static void ValidateMatrixEqualityFloat(const Matrix<float>& expected, + const Matrix<float>& actual) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const float* const* expected_elements = expected.elements(); + const float* const* actual_elements = actual.elements(); + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { + EXPECT_NEAR(expected_elements[i][j], actual_elements[i][j], kTolerance); + } + } + } + + static void ValidateMatrixEqualityComplexFloat( + const Matrix<complex<float> >& expected, + const Matrix<complex<float> >& actual) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const complex<float>* const* expected_elements = expected.elements(); + const complex<float>* const* actual_elements = actual.elements(); + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { + EXPECT_NEAR(expected_elements[i][j].real(), + actual_elements[i][j].real(), + kTolerance); + EXPECT_NEAR(expected_elements[i][j].imag(), + actual_elements[i][j].imag(), + kTolerance); + } + } + } + + static void ValidateMatrixNearEqualityComplexFloat( + const Matrix<complex<float> >& expected, + const Matrix<complex<float> >& actual, + float tolerance) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const complex<float>* const* expected_elements = expected.elements(); + const complex<float>* const* actual_elements = actual.elements(); + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { + EXPECT_NEAR(expected_elements[i][j].real(), + actual_elements[i][j].real(), + tolerance); + EXPECT_NEAR(expected_elements[i][j].imag(), + actual_elements[i][j].imag(), + tolerance); + } + } + } +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix_unittest.cc new file mode 100644 index 0000000000..4badfd07ed --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/matrix_unittest.cc @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <complex> + +#include "modules/audio_processing/beamformer/matrix.h" +#include "modules/audio_processing/beamformer/matrix_test_helpers.h" +#include "test/gtest.h" + +namespace webrtc { + +using std::complex; + +TEST(MatrixTest, TestMultiplySameSize) { + const int kNumRows = 2; + const int kNumCols = 2; + const float kValuesLeft[kNumRows][kNumCols] = {{1.1f, 2.2f}, {3.3f, 4.4f}}; + const float kValuesRight[kNumRows][kNumCols] = {{5.4f, 127.f}, + {4600.f, -555.f}}; + const float kValuesExpected[kNumRows][kNumCols] = {{10125.94f, -1081.3f}, + {20257.82f, -2022.9f}}; + + Matrix<float> lh_mat(*kValuesLeft, kNumRows, kNumCols); + Matrix<float> rh_mat(*kValuesRight, kNumRows, kNumCols); + Matrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<float> actual_result(kNumRows, kNumCols); + + actual_result.Multiply(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEquality(expected_result, actual_result); + + lh_mat.Multiply(rh_mat); + MatrixTestHelpers::ValidateMatrixEquality(lh_mat, actual_result); +} + +TEST(MatrixTest, TestMultiplyDifferentSize) { + const int kNumRowsLeft = 2; + const int kNumColsLeft = 3; + const int kNumRowsRight = 3; + const int kNumColsRight = 2; + const int kValuesLeft[kNumRowsLeft][kNumColsLeft] = {{35, 466, -15}, + {-3, 3422, 9}}; + const int kValuesRight[kNumRowsRight][kNumColsRight] = { + {765, -42}, {0, 194}, {625, 66321}}; + const int kValuesExpected[kNumRowsLeft][kNumColsRight] = {{17400, -905881}, + {3330, 1260883}}; + + Matrix<int> lh_mat(*kValuesLeft, kNumRowsLeft, kNumColsLeft); + Matrix<int> rh_mat(*kValuesRight, kNumRowsRight, kNumColsRight); + Matrix<int> expected_result(*kValuesExpected, kNumRowsLeft, kNumColsRight); + Matrix<int> actual_result(kNumRowsLeft, kNumColsRight); + + actual_result.Multiply(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEquality(expected_result, actual_result); + + lh_mat.Multiply(rh_mat); + MatrixTestHelpers::ValidateMatrixEquality(lh_mat, actual_result); +} + +TEST(MatrixTest, TestTranspose) { + const int kNumInitialRows = 2; + const int kNumInitialCols = 4; + const int kNumResultRows = 4; + const int kNumResultCols = 2; + const float kValuesInitial[kNumInitialRows][kNumInitialCols] = { + {1.1f, 2.2f, 3.3f, 4.4f}, {5.5f, 6.6f, 7.7f, 8.8f}}; + const float kValuesExpected[kNumResultRows][kNumResultCols] = { + {1.1f, 5.5f}, {2.2f, 6.6f}, {3.3f, 7.7f}, {4.4f, 8.8f}}; + + Matrix<float> initial_mat(*kValuesInitial, kNumInitialRows, kNumInitialCols); + Matrix<float> expected_result( + *kValuesExpected, kNumResultRows, kNumResultCols); + Matrix<float> actual_result(kNumResultRows, kNumResultCols); + + actual_result.Transpose(initial_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(expected_result, + actual_result); + initial_mat.Transpose(); + MatrixTestHelpers::ValidateMatrixEqualityFloat(initial_mat, actual_result); +} + +TEST(MatrixTest, TestScale) { + const int kNumRows = 3; + const int kNumCols = 3; + const int kScaleFactor = -9; + const int kValuesInitial[kNumRows][kNumCols] = { + {1, 20, 5000}, {-3, -29, 66}, {7654, 0, -23455}}; + const int kValuesExpected[kNumRows][kNumCols] = { + {-9, -180, -45000}, {27, 261, -594}, {-68886, 0, 211095}}; + + Matrix<int> initial_mat(*kValuesInitial, kNumRows, kNumCols); + Matrix<int> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<int> actual_result; + + actual_result.Scale(initial_mat, kScaleFactor); + MatrixTestHelpers::ValidateMatrixEquality(expected_result, actual_result); + + initial_mat.Scale(kScaleFactor); + MatrixTestHelpers::ValidateMatrixEquality(initial_mat, actual_result); +} + +TEST(MatrixTest, TestPointwiseAdd) { + const int kNumRows = 2; + const int kNumCols = 3; + const float kValuesLeft[kNumRows][kNumCols] = {{1.1f, 210.45f, -549.2f}, + {11.876f, 586.7f, -64.35f}}; + const float kValuesRight[kNumRows][kNumCols] = {{-50.4f, 1.f, 0.5f}, + {460.f, -554.2f, 4566.f}}; + const float kValuesExpected[kNumRows][kNumCols] = { + {-49.3f, 211.45f, -548.7f}, {471.876f, 32.5f, 4501.65f}}; + + Matrix<float> lh_mat(*kValuesLeft, kNumRows, kNumCols); + Matrix<float> rh_mat(*kValuesRight, kNumRows, kNumCols); + Matrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<float> actual_result; + + actual_result.Add(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(expected_result, + actual_result); + lh_mat.Add(rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(lh_mat, actual_result); +} + +TEST(MatrixTest, TestPointwiseSubtract) { + const int kNumRows = 3; + const int kNumCols = 2; + const float kValuesLeft[kNumRows][kNumCols] = { + {1.1f, 210.45f}, {-549.2f, 11.876f}, {586.7f, -64.35f}}; + const float kValuesRight[kNumRows][kNumCols] = { + {-50.4f, 1.f}, {0.5f, 460.f}, {-554.2f, 4566.f}}; + const float kValuesExpected[kNumRows][kNumCols] = { + {51.5f, 209.45f}, {-549.7f, -448.124f}, {1140.9f, -4630.35f}}; + + Matrix<float> lh_mat(*kValuesLeft, kNumRows, kNumCols); + Matrix<float> rh_mat(*kValuesRight, kNumRows, kNumCols); + Matrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<float> actual_result; + + actual_result.Subtract(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(expected_result, + actual_result); + + lh_mat.Subtract(rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(lh_mat, actual_result); +} + +TEST(MatrixTest, TestPointwiseMultiply) { + const int kNumRows = 1; + const int kNumCols = 5; + const float kValuesLeft[kNumRows][kNumCols] = { + {1.1f, 6.4f, 0.f, -1.f, -88.3f}}; + const float kValuesRight[kNumRows][kNumCols] = { + {53.2f, -210.45f, -549.2f, 99.99f, -45.2f}}; + const float kValuesExpected[kNumRows][kNumCols] = { + {58.52f, -1346.88f, 0.f, -99.99f, 3991.16f}}; + + Matrix<float> lh_mat(*kValuesLeft, kNumRows, kNumCols); + Matrix<float> rh_mat(*kValuesRight, kNumRows, kNumCols); + Matrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<float> actual_result; + + actual_result.PointwiseMultiply(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(expected_result, + actual_result); + + lh_mat.PointwiseMultiply(rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(lh_mat, actual_result); +} + +TEST(MatrixTest, TestPointwiseDivide) { + const int kNumRows = 5; + const int kNumCols = 1; + const float kValuesLeft[kNumRows][kNumCols] = { + {1.1f}, {6.4f}, {0.f}, {-1.f}, {-88.3f}}; + const float kValuesRight[kNumRows][kNumCols] = { + {53.2f}, {-210.45f}, {-549.2f}, {99.99f}, {-45.2f}}; + const float kValuesExpected[kNumRows][kNumCols] = { + {0.020676691f}, {-0.03041102399f}, {0.f}, {-0.010001f}, {1.9535398f}}; + + Matrix<float> lh_mat(*kValuesLeft, kNumRows, kNumCols); + Matrix<float> rh_mat(*kValuesRight, kNumRows, kNumCols); + Matrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<float> actual_result; + + actual_result.PointwiseDivide(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(expected_result, + actual_result); + + lh_mat.PointwiseDivide(rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(lh_mat, actual_result); +} + +TEST(MatrixTest, TestPointwiseSquareRoot) { + const int kNumRows = 2; + const int kNumCols = 2; + const int kValues[kNumRows][kNumCols] = {{4, 9}, {16, 0}}; + const int kValuesExpected[kNumRows][kNumCols] = {{2, 3}, {4, 0}}; + + Matrix<int> operand_mat(*kValues, kNumRows, kNumCols); + Matrix<int> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<int> actual_result; + + actual_result.PointwiseSquareRoot(operand_mat); + MatrixTestHelpers::ValidateMatrixEquality(expected_result, actual_result); + + operand_mat.PointwiseSquareRoot(); + MatrixTestHelpers::ValidateMatrixEquality(operand_mat, actual_result); +} + +TEST(MatrixTest, TestPointwiseSquareRootComplex) { + const int kNumRows = 1; + const int kNumCols = 3; + const complex<float> kValues[kNumRows][kNumCols] = { + {complex<float>(-4.f, 0), complex<float>(0, 9), complex<float>(3, -4)}}; + const complex<float> kValuesExpected[kNumRows][kNumCols] = { + {complex<float>(0.f, 2.f), complex<float>(2.1213202f, 2.1213202f), + complex<float>(2.f, -1.f)}}; + + Matrix<complex<float> > operand_mat(*kValues, kNumRows, kNumCols); + Matrix<complex<float> > expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<complex<float> > actual_result; + + actual_result.PointwiseSquareRoot(operand_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(expected_result, + actual_result); + + operand_mat.PointwiseSquareRoot(); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(operand_mat, + actual_result); +} + +TEST(MatrixTest, TestPointwiseAbsoluteValue) { + const int kNumRows = 1; + const int kNumCols = 3; + const complex<float> kValues[kNumRows][kNumCols] = { + {complex<float>(-4.f, 0), complex<float>(0, 9), complex<float>(3, -4)}}; + const complex<float> kValuesExpected[kNumRows][kNumCols] = { + {complex<float>(4.f, 0), complex<float>(9.f, 0), complex<float>(5.f, 0)}}; + + Matrix<complex<float> > operand_mat(*kValues, kNumRows, kNumCols); + Matrix<complex<float> > expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<complex<float> > actual_result; + + actual_result.PointwiseAbsoluteValue(operand_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(expected_result, + actual_result); + + operand_mat.PointwiseAbsoluteValue(); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(operand_mat, + actual_result); +} + +TEST(MatrixTest, TestPointwiseSquare) { + const int kNumRows = 1; + const int kNumCols = 3; + const float kValues[kNumRows][kNumCols] = {{2.4f, -4.f, 3.3f}}; + const float kValuesExpected[kNumRows][kNumCols] = {{5.76f, 16.f, 10.89f}}; + + Matrix<float> operand_mat(*kValues, kNumRows, kNumCols); + Matrix<float> expected_result(*kValuesExpected, kNumRows, kNumCols); + Matrix<float> actual_result; + + actual_result.PointwiseSquare(operand_mat); + MatrixTestHelpers::ValidateMatrixEqualityFloat(expected_result, + actual_result); + + operand_mat.PointwiseSquare(); + MatrixTestHelpers::ValidateMatrixEqualityFloat(operand_mat, actual_result); +} + +TEST(MatrixTest, TestComplexOperations) { + const int kNumRows = 2; + const int kNumCols = 2; + + const complex<float> kValuesLeft[kNumRows][kNumCols] = { + {complex<float>(1.f, 1.f), complex<float>(2.f, 2.f)}, + {complex<float>(3.f, 3.f), complex<float>(4.f, 4.f)}}; + + const complex<float> kValuesRight[kNumRows][kNumCols] = { + {complex<float>(5.f, 5.f), complex<float>(6.f, 6.f)}, + {complex<float>(7.f, 7.f), complex<float>(8.f, 8.f)}}; + + const complex<float> kValuesExpectedAdd[kNumRows][kNumCols] = { + {complex<float>(6.f, 6.f), complex<float>(8.f, 8.f)}, + {complex<float>(10.f, 10.f), complex<float>(12.f, 12.f)}}; + + const complex<float> kValuesExpectedMultiply[kNumRows][kNumCols] = { + {complex<float>(0.f, 38.f), complex<float>(0.f, 44.f)}, + {complex<float>(0.f, 86.f), complex<float>(0.f, 100.f)}}; + + const complex<float> kValuesExpectedPointwiseDivide[kNumRows][kNumCols] = { + {complex<float>(0.2f, 0.f), complex<float>(0.33333333f, 0.f)}, + {complex<float>(0.42857143f, 0.f), complex<float>(0.5f, 0.f)}}; + + Matrix<complex<float> > lh_mat(*kValuesLeft, kNumRows, kNumCols); + Matrix<complex<float> > rh_mat(*kValuesRight, kNumRows, kNumCols); + Matrix<complex<float> > expected_result_add( + *kValuesExpectedAdd, kNumRows, kNumCols); + Matrix<complex<float> > expected_result_multiply( + *kValuesExpectedMultiply, kNumRows, kNumCols); + Matrix<complex<float> > expected_result_pointwise_divide( + *kValuesExpectedPointwiseDivide, kNumRows, kNumCols); + Matrix<complex<float> > actual_result_add; + Matrix<complex<float> > actual_result_multiply(kNumRows, kNumCols); + Matrix<complex<float> > actual_result_pointwise_divide; + + actual_result_add.Add(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat(expected_result_add, + actual_result_add); + + actual_result_multiply.Multiply(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat( + expected_result_multiply, actual_result_multiply); + + actual_result_pointwise_divide.PointwiseDivide(lh_mat, rh_mat); + MatrixTestHelpers::ValidateMatrixEqualityComplexFloat( + expected_result_pointwise_divide, actual_result_pointwise_divide); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h new file mode 100644 index 0000000000..c4c7358b63 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_ +#define MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_ + +#include <vector> + +#include "modules/audio_processing/beamformer/nonlinear_beamformer.h" +#include "test/gmock.h" + +namespace webrtc { + +class MockNonlinearBeamformer : public NonlinearBeamformer { + public: + MockNonlinearBeamformer(const std::vector<Point>& array_geometry, + size_t num_postfilter_channels) + : NonlinearBeamformer(array_geometry, num_postfilter_channels) {} + + MockNonlinearBeamformer(const std::vector<Point>& array_geometry) + : NonlinearBeamformer(array_geometry, 1u) {} + + MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz)); + MOCK_METHOD1(AnalyzeChunk, void(const ChannelBuffer<float>& data)); + MOCK_METHOD1(PostFilter, void(ChannelBuffer<float>* data)); + MOCK_METHOD1(IsInBeam, bool(const SphericalPointf& spherical_point)); + MOCK_METHOD0(is_target_present, bool()); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc new file mode 100644 index 0000000000..12f6d2f465 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/beamformer/nonlinear_beamformer.h" + +#include <algorithm> +#include <cmath> +#include <numeric> +#include <vector> + +#include "common_audio/window_generator.h" +#include "modules/audio_processing/beamformer/covariance_matrix_generator.h" +#include "rtc_base/arraysize.h" + +namespace webrtc { +namespace { + +// Alpha for the Kaiser Bessel Derived window. +const float kKbdAlpha = 1.5f; + +const float kSpeedOfSoundMeterSeconds = 343; + +// The minimum separation in radians between the target direction and an +// interferer scenario. +const float kMinAwayRadians = 0.2f; + +// The separation between the target direction and the closest interferer +// scenario is proportional to this constant. +const float kAwaySlope = 0.008f; + +// When calculating the interference covariance matrix, this is the weight for +// the weighted average between the uniform covariance matrix and the angled +// covariance matrix. +// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) +const float kBalance = 0.95f; + +// Alpha coefficients for mask smoothing. +const float kMaskTimeSmoothAlpha = 0.2f; +const float kMaskFrequencySmoothAlpha = 0.6f; + +// The average mask is computed from masks in this mid-frequency range. If these +// ranges are changed |kMaskQuantile| might need to be adjusted. +const int kLowMeanStartHz = 200; +const int kLowMeanEndHz = 400; + +// Range limiter for subtractive terms in the nominator and denominator of the +// postfilter expression. It handles the scenario mismatch between the true and +// model sources (target and interference). +const float kCutOffConstant = 0.9999f; + +// Quantile of mask values which is used to estimate target presence. +const float kMaskQuantile = 0.7f; +// Mask threshold over which the data is considered signal and not interference. +// It has to be updated every time the postfilter calculation is changed +// significantly. +// TODO(aluebs): Write a tool to tune the target threshold automatically based +// on files annotated with target and interference ground truth. +const float kMaskTargetThreshold = 0.01f; +// Time in seconds after which the data is considered interference if the mask +// does not pass |kMaskTargetThreshold|. +const float kHoldTargetSeconds = 0.25f; + +// To compensate for the attenuation this algorithm introduces to the target +// signal. It was estimated empirically from a low-noise low-reverberation +// recording from broadside. +const float kCompensationGain = 2.f; + +// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is +// used; to accomplish this, we compute both multiplications in the same loop. +// The returned norm is clamped to be non-negative. +float Norm(const ComplexMatrix<float>& mat, + const ComplexMatrix<float>& norm_mat) { + RTC_CHECK_EQ(1, norm_mat.num_rows()); + RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows()); + RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns()); + + complex<float> first_product = complex<float>(0.f, 0.f); + complex<float> second_product = complex<float>(0.f, 0.f); + + const complex<float>* const* mat_els = mat.elements(); + const complex<float>* const* norm_mat_els = norm_mat.elements(); + + for (size_t i = 0; i < norm_mat.num_columns(); ++i) { + for (size_t j = 0; j < norm_mat.num_columns(); ++j) { + first_product += conj(norm_mat_els[0][j]) * mat_els[j][i]; + } + second_product += first_product * norm_mat_els[0][i]; + first_product = 0.f; + } + return std::max(second_product.real(), 0.f); +} + +// Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|. +complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs, + const ComplexMatrix<float>& rhs) { + RTC_CHECK_EQ(1, lhs.num_rows()); + RTC_CHECK_EQ(1, rhs.num_rows()); + RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns()); + + const complex<float>* const* lhs_elements = lhs.elements(); + const complex<float>* const* rhs_elements = rhs.elements(); + + complex<float> result = complex<float>(0.f, 0.f); + for (size_t i = 0; i < lhs.num_columns(); ++i) { + result += conj(lhs_elements[0][i]) * rhs_elements[0][i]; + } + + return result; +} + +// Works for positive numbers only. +size_t Round(float x) { + return static_cast<size_t>(std::floor(x + 0.5f)); +} + +// Calculates the sum of squares of a complex matrix. +float SumSquares(const ComplexMatrix<float>& mat) { + float sum_squares = 0.f; + const complex<float>* const* mat_els = mat.elements(); + for (size_t i = 0; i < mat.num_rows(); ++i) { + for (size_t j = 0; j < mat.num_columns(); ++j) { + float abs_value = std::abs(mat_els[i][j]); + sum_squares += abs_value * abs_value; + } + } + return sum_squares; +} + +// Does |out| = |in|.' * conj(|in|) for row vector |in|. +void TransposedConjugatedProduct(const ComplexMatrix<float>& in, + ComplexMatrix<float>* out) { + RTC_CHECK_EQ(1, in.num_rows()); + RTC_CHECK_EQ(out->num_rows(), in.num_columns()); + RTC_CHECK_EQ(out->num_columns(), in.num_columns()); + const complex<float>* in_elements = in.elements()[0]; + complex<float>* const* out_elements = out->elements(); + for (size_t i = 0; i < out->num_rows(); ++i) { + for (size_t j = 0; j < out->num_columns(); ++j) { + out_elements[i][j] = in_elements[i] * conj(in_elements[j]); + } + } +} + +std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) { + for (size_t dim = 0; dim < 3; ++dim) { + float center = 0.f; + for (size_t i = 0; i < array_geometry.size(); ++i) { + center += array_geometry[i].c[dim]; + } + center /= array_geometry.size(); + for (size_t i = 0; i < array_geometry.size(); ++i) { + array_geometry[i].c[dim] -= center; + } + } + return array_geometry; +} + +} // namespace + +const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); + +// static +const size_t NonlinearBeamformer::kNumFreqBins; + +PostFilterTransform::PostFilterTransform(size_t num_channels, + size_t chunk_length, + float* window, + size_t fft_size) + : transform_(num_channels, + num_channels, + chunk_length, + window, + fft_size, + fft_size / 2, + this), + num_freq_bins_(fft_size / 2 + 1) {} + +void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) { + final_mask_ = final_mask; + transform_.ProcessChunk(data, data); +} + +void PostFilterTransform::ProcessAudioBlock(const complex<float>* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + complex<float>* const* output) { + RTC_DCHECK_EQ(num_freq_bins_, num_freq_bins); + RTC_DCHECK_EQ(num_input_channels, num_output_channels); + + for (size_t ch = 0; ch < num_input_channels; ++ch) { + for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) { + output[ch][f_ix] = + kCompensationGain * final_mask_[f_ix] * input[ch][f_ix]; + } + } +} + +NonlinearBeamformer::NonlinearBeamformer( + const std::vector<Point>& array_geometry, + size_t num_postfilter_channels, + SphericalPointf target_direction) + : num_input_channels_(array_geometry.size()), + num_postfilter_channels_(num_postfilter_channels), + array_geometry_(GetCenteredArray(array_geometry)), + array_normal_(GetArrayNormalIfExists(array_geometry)), + min_mic_spacing_(GetMinimumSpacing(array_geometry)), + target_angle_radians_(target_direction.azimuth()), + away_radians_(std::min( + static_cast<float>(M_PI), + std::max(kMinAwayRadians, + kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) { + WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); +} + +NonlinearBeamformer::~NonlinearBeamformer() = default; + +void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { + chunk_length_ = + static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); + sample_rate_hz_ = sample_rate_hz; + + high_pass_postfilter_mask_ = 1.f; + is_target_present_ = false; + hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; + interference_blocks_count_ = hold_target_blocks_; + + process_transform_.reset(new LappedTransform(num_input_channels_, + 0u, + chunk_length_, + window_, + kFftSize, + kFftSize / 2, + this)); + postfilter_transform_.reset(new PostFilterTransform( + num_postfilter_channels_, chunk_length_, window_, kFftSize)); + const float wave_number_step = + (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds); + for (size_t i = 0; i < kNumFreqBins; ++i) { + time_smooth_mask_[i] = 1.f; + final_mask_[i] = 1.f; + wave_numbers_[i] = i * wave_number_step; + } + + InitLowFrequencyCorrectionRanges(); + InitDiffuseCovMats(); + AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f)); +} + +// These bin indexes determine the regions over which a mean is taken. This is +// applied as a constant value over the adjacent end "frequency correction" +// regions. +// +// low_mean_start_bin_ high_mean_start_bin_ +// v v constant +// |----------------|--------|----------------|-------|----------------| +// constant ^ ^ +// low_mean_end_bin_ high_mean_end_bin_ +// +void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() { + low_mean_start_bin_ = Round(static_cast<float>(kLowMeanStartHz) * + kFftSize / sample_rate_hz_); + low_mean_end_bin_ = Round(static_cast<float>(kLowMeanEndHz) * + kFftSize / sample_rate_hz_); + + RTC_DCHECK_GT(low_mean_start_bin_, 0U); + RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); +} + +void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() { + const float kAliasingFreqHz = + kSpeedOfSoundMeterSeconds / + (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_)))); + const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, + sample_rate_hz_ / 2.f); + const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, + sample_rate_hz_ / 2.f); + high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); + high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); + + RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); + RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); + RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); +} + +void NonlinearBeamformer::InitInterfAngles() { + interf_angles_radians_.clear(); + const Point target_direction = AzimuthToPoint(target_angle_radians_); + const Point clockwise_interf_direction = + AzimuthToPoint(target_angle_radians_ - away_radians_); + if (!array_normal_ || + DotProduct(*array_normal_, target_direction) * + DotProduct(*array_normal_, clockwise_interf_direction) >= + 0.f) { + // The target and clockwise interferer are in the same half-plane defined + // by the array. + interf_angles_radians_.push_back(target_angle_radians_ - away_radians_); + } else { + // Otherwise, the interferer will begin reflecting back at the target. + // Instead rotate it away 180 degrees. + interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ + + M_PI); + } + const Point counterclock_interf_direction = + AzimuthToPoint(target_angle_radians_ + away_radians_); + if (!array_normal_ || + DotProduct(*array_normal_, target_direction) * + DotProduct(*array_normal_, counterclock_interf_direction) >= + 0.f) { + // The target and counter-clockwise interferer are in the same half-plane + // defined by the array. + interf_angles_radians_.push_back(target_angle_radians_ + away_radians_); + } else { + // Otherwise, the interferer will begin reflecting back at the target. + // Instead rotate it away 180 degrees. + interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ - + M_PI); + } +} + +void NonlinearBeamformer::InitDelaySumMasks() { + for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { + delay_sum_masks_[f_ix].Resize(1, num_input_channels_); + CovarianceMatrixGenerator::PhaseAlignmentMasks( + f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds, + array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]); + + complex_f norm_factor = sqrt( + ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); + delay_sum_masks_[f_ix].Scale(1.f / norm_factor); + } +} + +void NonlinearBeamformer::InitTargetCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); + TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); + } +} + +void NonlinearBeamformer::InitDiffuseCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_); + CovarianceMatrixGenerator::UniformCovarianceMatrix( + wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]); + complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0]; + uniform_cov_mat_[i].Scale(1.f / normalization_factor); + uniform_cov_mat_[i].Scale(1 - kBalance); + } +} + +void NonlinearBeamformer::InitInterfCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + interf_cov_mats_[i].clear(); + for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { + interf_cov_mats_[i].push_back(std::unique_ptr<ComplexMatrixF>( + new ComplexMatrixF(num_input_channels_, num_input_channels_))); + ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); + CovarianceMatrixGenerator::AngledCovarianceMatrix( + kSpeedOfSoundMeterSeconds, + interf_angles_radians_[j], + i, + kFftSize, + kNumFreqBins, + sample_rate_hz_, + array_geometry_, + &angled_cov_mat); + // Normalize matrices before averaging them. + complex_f normalization_factor = angled_cov_mat.elements()[0][0]; + angled_cov_mat.Scale(1.f / normalization_factor); + // Weighted average of matrices. + angled_cov_mat.Scale(kBalance); + interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat); + } + } +} + +void NonlinearBeamformer::NormalizeCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); + rpsiws_[i].clear(); + for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { + rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i])); + } + } +} + +void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer<float>& data) { + RTC_DCHECK_EQ(data.num_channels(), num_input_channels_); + RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_); + + old_high_pass_mask_ = high_pass_postfilter_mask_; + process_transform_->ProcessChunk(data.channels(0), nullptr); +} + +void NonlinearBeamformer::PostFilter(ChannelBuffer<float>* data) { + RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_); + // TODO(aluebs): Change to RTC_CHECK_EQ once the ChannelBuffer is updated. + RTC_DCHECK_GE(data->num_channels(), num_postfilter_channels_); + + postfilter_transform_->ProcessChunk(data->channels(0), final_mask_); + + // Ramp up/down for smoothing is needed in order to avoid discontinuities in + // the transitions between 10 ms frames. + const float ramp_increment = + (high_pass_postfilter_mask_ - old_high_pass_mask_) / + data->num_frames_per_band(); + for (size_t i = 1; i < data->num_bands(); ++i) { + float smoothed_mask = old_high_pass_mask_; + for (size_t j = 0; j < data->num_frames_per_band(); ++j) { + smoothed_mask += ramp_increment; + for (size_t k = 0; k < num_postfilter_channels_; ++k) { + data->channels(i)[k][j] *= smoothed_mask; + } + } + } +} + +void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { + target_angle_radians_ = target_direction.azimuth(); + InitHighFrequencyCorrectionRanges(); + InitInterfAngles(); + InitDelaySumMasks(); + InitTargetCovMats(); + InitInterfCovMats(); + NormalizeCovMats(); +} + +bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { + // If more than half-beamwidth degrees away from the beam's center, + // you are out of the beam. + return fabs(spherical_point.azimuth() - target_angle_radians_) < + kHalfBeamWidthRadians; +} + +bool NonlinearBeamformer::is_target_present() { return is_target_present_; } + +void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + complex_f* const* output) { + RTC_CHECK_EQ(kNumFreqBins, num_freq_bins); + RTC_CHECK_EQ(num_input_channels_, num_input_channels); + RTC_CHECK_EQ(0, num_output_channels); + + // Calculating the post-filter masks. Note that we need two for each + // frequency bin to account for the positive and negative interferer + // angle. + for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { + eig_m_.CopyFromColumn(input, i, num_input_channels_); + float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_)); + if (eig_m_norm_factor != 0.f) { + eig_m_.Scale(1.f / eig_m_norm_factor); + } + + float rxim = Norm(target_cov_mats_[i], eig_m_); + float ratio_rxiw_rxim = 0.f; + if (rxim > 0.f) { + ratio_rxiw_rxim = rxiws_[i] / rxim; + } + + complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_)); + rmw *= rmw; + float rmw_r = rmw.real(); + + new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0], + rpsiws_[i][0], + ratio_rxiw_rxim, + rmw_r); + for (size_t j = 1; j < interf_angles_radians_.size(); ++j) { + float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j], + rpsiws_[i][j], + ratio_rxiw_rxim, + rmw_r); + if (tmp_mask < new_mask_[i]) { + new_mask_[i] = tmp_mask; + } + } + } + + ApplyMaskTimeSmoothing(); + EstimateTargetPresence(); + ApplyLowFrequencyCorrection(); + ApplyHighFrequencyCorrection(); + ApplyMaskFrequencySmoothing(); +} + +float NonlinearBeamformer::CalculatePostfilterMask( + const ComplexMatrixF& interf_cov_mat, + float rpsiw, + float ratio_rxiw_rxim, + float rmw_r) { + float rpsim = Norm(interf_cov_mat, eig_m_); + + float ratio = 0.f; + if (rpsim > 0.f) { + ratio = rpsiw / rpsim; + } + + float numerator = 1.f - kCutOffConstant; + if (rmw_r > 0.f) { + numerator = 1.f - std::min(kCutOffConstant, ratio / rmw_r); + } + + float denominator = 1.f - kCutOffConstant; + if (ratio_rxiw_rxim > 0.f) { + denominator = 1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim); + } + + return numerator / denominator; +} + +// Smooth new_mask_ into time_smooth_mask_. +void NonlinearBeamformer::ApplyMaskTimeSmoothing() { + for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { + time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + + (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; + } +} + +// Copy time_smooth_mask_ to final_mask_ and smooth over frequency. +void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { + // Smooth over frequency in both directions. The "frequency correction" + // regions have constant value, but we enter them to smooth over the jump + // that exists at the boundary. However, this does mean when smoothing "away" + // from the region that we only need to use the last element. + // + // Upward smoothing: + // low_mean_start_bin_ + // v + // |------|------------|------| + // ^------------------>^ + // + // Downward smoothing: + // high_mean_end_bin_ + // v + // |------|------------|------| + // ^<------------------^ + std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_); + for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) { + final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] + + (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1]; + } + for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) { + final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] + + (1 - kMaskFrequencySmoothAlpha) * final_mask_[i]; + } +} + +// Apply low frequency correction to time_smooth_mask_. +void NonlinearBeamformer::ApplyLowFrequencyCorrection() { + const float low_frequency_mask = + MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1); + std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_, + low_frequency_mask); +} + +// Apply high frequency correction to time_smooth_mask_. Update +// high_pass_postfilter_mask_ to use for the high frequency time-domain bands. +void NonlinearBeamformer::ApplyHighFrequencyCorrection() { + high_pass_postfilter_mask_ = + MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1); + std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1, + time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_); +} + +// Compute mean over the given range of time_smooth_mask_, [first, last). +float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) { + RTC_DCHECK_GT(last, first); + const float sum = std::accumulate(time_smooth_mask_ + first, + time_smooth_mask_ + last, 0.f); + return sum / (last - first); +} + +void NonlinearBeamformer::EstimateTargetPresence() { + const size_t quantile = static_cast<size_t>( + (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile + + low_mean_start_bin_); + std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile, + new_mask_ + high_mean_end_bin_ + 1); + if (new_mask_[quantile] > kMaskTargetThreshold) { + is_target_present_ = true; + interference_blocks_count_ = 0; + } else { + is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h new file mode 100644 index 0000000000..76556e7a45 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ +#define MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include <math.h> + +#include <memory> +#include <vector> + +#include "common_audio/lapped_transform.h" +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/beamformer/array_util.h" +#include "modules/audio_processing/beamformer/complex_matrix.h" + +namespace webrtc { + +class PostFilterTransform : public LappedTransform::Callback { + public: + PostFilterTransform(size_t num_channels, + size_t chunk_length, + float* window, + size_t fft_size); + + void ProcessChunk(float* const* data, float* final_mask); + + protected: + void ProcessAudioBlock(const complex<float>* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + complex<float>* const* output) override; + + private: + LappedTransform transform_; + const size_t num_freq_bins_; + float* final_mask_; +}; + +// Enhances sound sources coming directly in front of a uniform linear array +// and suppresses sound sources coming from all other directions. Operates on +// multichannel signals and produces single-channel output. +// +// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear +// Beamforming Postprocessor" by Bastiaan Kleijn. +class NonlinearBeamformer : public LappedTransform::Callback { + public: + static const float kHalfBeamWidthRadians; + + explicit NonlinearBeamformer( + const std::vector<Point>& array_geometry, + size_t num_postfilter_channels = 1u, + SphericalPointf target_direction = + SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f)); + ~NonlinearBeamformer() override; + + // Sample rate corresponds to the lower band. + // Needs to be called before the NonlinearBeamformer can be used. + virtual void Initialize(int chunk_size_ms, int sample_rate_hz); + + // Analyzes one time-domain chunk of audio. The audio is expected to be split + // into frequency bands inside the ChannelBuffer. The number of frames and + // channels must correspond to the constructor parameters. + virtual void AnalyzeChunk(const ChannelBuffer<float>& data); + + // Applies the postfilter mask to one chunk of audio. The audio is expected to + // be split into frequency bands inside the ChannelBuffer. The number of + // frames and channels must correspond to the constructor parameters. + virtual void PostFilter(ChannelBuffer<float>* data); + + virtual void AimAt(const SphericalPointf& target_direction); + + virtual bool IsInBeam(const SphericalPointf& spherical_point); + + // After processing each block |is_target_present_| is set to true if the + // target signal es present and to false otherwise. This methods can be called + // to know if the data is target signal or interference and process it + // accordingly. + virtual bool is_target_present(); + + protected: + // Process one frequency-domain block of audio. This is where the fun + // happens. Implements LappedTransform::Callback. + void ProcessAudioBlock(const complex<float>* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + complex<float>* const* output) override; + + private: + FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest, + InterfAnglesTakeAmbiguityIntoAccount); + + typedef Matrix<float> MatrixF; + typedef ComplexMatrix<float> ComplexMatrixF; + typedef complex<float> complex_f; + + void InitLowFrequencyCorrectionRanges(); + void InitHighFrequencyCorrectionRanges(); + void InitInterfAngles(); + void InitDelaySumMasks(); + void InitTargetCovMats(); + void InitDiffuseCovMats(); + void InitInterfCovMats(); + void NormalizeCovMats(); + + // Calculates postfilter masks that minimize the mean squared error of our + // estimation of the desired signal. + float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, + float rpsiw, + float ratio_rxiw_rxim, + float rmxi_r); + + // Prevents the postfilter masks from degenerating too quickly (a cause of + // musical noise). + void ApplyMaskTimeSmoothing(); + void ApplyMaskFrequencySmoothing(); + + // The postfilter masks are unreliable at low frequencies. Calculates a better + // mask by averaging mid-low frequency values. + void ApplyLowFrequencyCorrection(); + + // Postfilter masks are also unreliable at high frequencies. Average mid-high + // frequency masks to calculate a single mask per block which can be applied + // in the time-domain. Further, we average these block-masks over a chunk, + // resulting in one postfilter mask per audio chunk. This allows us to skip + // both transforming and blocking the high-frequency signal. + void ApplyHighFrequencyCorrection(); + + // Compute the means needed for the above frequency correction. + float MaskRangeMean(size_t start_bin, size_t end_bin); + + // Applies post-filter mask to |input| and store in |output|. + void ApplyPostFilter(const complex_f* input, complex_f* output); + + void EstimateTargetPresence(); + + static const size_t kFftSize = 256; + static const size_t kNumFreqBins = kFftSize / 2 + 1; + + // Deals with the fft transform and blocking. + size_t chunk_length_; + std::unique_ptr<LappedTransform> process_transform_; + std::unique_ptr<PostFilterTransform> postfilter_transform_; + float window_[kFftSize]; + + // Parameters exposed to the user. + const size_t num_input_channels_; + const size_t num_postfilter_channels_; + int sample_rate_hz_; + + const std::vector<Point> array_geometry_; + // The normal direction of the array if it has one and it is in the xy-plane. + const rtc::Optional<Point> array_normal_; + + // Minimum spacing between microphone pairs. + const float min_mic_spacing_; + + // Calculated based on user-input and constants in the .cc file. + size_t low_mean_start_bin_; + size_t low_mean_end_bin_; + size_t high_mean_start_bin_; + size_t high_mean_end_bin_; + + // Quickly varying mask updated every block. + float new_mask_[kNumFreqBins]; + // Time smoothed mask. + float time_smooth_mask_[kNumFreqBins]; + // Time and frequency smoothed mask. + float final_mask_[kNumFreqBins]; + + float target_angle_radians_; + // Angles of the interferer scenarios. + std::vector<float> interf_angles_radians_; + // The angle between the target and the interferer scenarios. + const float away_radians_; + + // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. + ComplexMatrixF delay_sum_masks_[kNumFreqBins]; + + // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x + // |num_input_channels_|. + ComplexMatrixF target_cov_mats_[kNumFreqBins]; + ComplexMatrixF uniform_cov_mat_[kNumFreqBins]; + // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x + // |num_input_channels_|. The vector has a size equal to the number of + // interferer scenarios. + std::vector<std::unique_ptr<ComplexMatrixF>> interf_cov_mats_[kNumFreqBins]; + + // Of length |kNumFreqBins|. + float wave_numbers_[kNumFreqBins]; + + // Preallocated for ProcessAudioBlock() + // Of length |kNumFreqBins|. + float rxiws_[kNumFreqBins]; + // The vector has a size equal to the number of interferer scenarios. + std::vector<float> rpsiws_[kNumFreqBins]; + + // The microphone normalization factor. + ComplexMatrixF eig_m_; + + // For processing the high-frequency input signal. + float high_pass_postfilter_mask_; + float old_high_pass_mask_; + + // True when the target signal is present. + bool is_target_present_; + // Number of blocks after which the data is considered interference if the + // mask does not pass |kMaskSignalThreshold|. + size_t hold_target_blocks_; + // Number of blocks since the last mask that passed |kMaskSignalThreshold|. + size_t interference_blocks_count_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc new file mode 100644 index 0000000000..296cd6d4ae --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <vector> + +#include "common_audio/channel_buffer.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/beamformer/nonlinear_beamformer.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/checks.h" +#include "rtc_base/flags.h" +#include "rtc_base/format_macros.h" + +DEFINE_string(i, "", "The name of the input file to read from."); +DEFINE_string(o, "out.wav", "Name of the output file to write to."); +DEFINE_string(mic_positions, "", + "Space delimited cartesian coordinates of microphones in meters. " + "The coordinates of each point are contiguous. " + "For a two element array: \"x1 y1 z1 x2 y2 z2\""); +DEFINE_bool(help, false, "Prints this message."); + +namespace webrtc { +namespace { + +const int kChunksPerSecond = 100; +const int kChunkSizeMs = 1000 / kChunksPerSecond; + +const char kUsage[] = + "Command-line tool to run beamforming on WAV files. The signal is passed\n" + "in as a single band, unlike the audio processing interface which splits\n" + "signals into multiple bands.\n"; + +} // namespace + +int main(int argc, char* argv[]) { + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) || + FLAG_help || argc != 1) { + printf("%s", kUsage); + if (FLAG_help) { + rtc::FlagList::Print(nullptr, false); + return 0; + } + return 1; + } + + WavReader in_file(FLAG_i); + WavWriter out_file(FLAG_o, in_file.sample_rate(), in_file.num_channels()); + + const size_t num_mics = in_file.num_channels(); + const std::vector<Point> array_geometry = + ParseArrayGeometry(FLAG_mic_positions, num_mics); + RTC_CHECK_EQ(array_geometry.size(), num_mics); + + NonlinearBeamformer bf(array_geometry, array_geometry.size()); + bf.Initialize(kChunkSizeMs, in_file.sample_rate()); + + printf("Input file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n", + FLAG_i, in_file.num_channels(), in_file.sample_rate()); + printf("Output file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n", + FLAG_o, out_file.num_channels(), out_file.sample_rate()); + + ChannelBuffer<float> buf( + rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond), + in_file.num_channels()); + + std::vector<float> interleaved(buf.size()); + while (in_file.ReadSamples(interleaved.size(), + &interleaved[0]) == interleaved.size()) { + FloatS16ToFloat(&interleaved[0], interleaved.size(), &interleaved[0]); + Deinterleave(&interleaved[0], buf.num_frames(), + buf.num_channels(), buf.channels()); + + bf.AnalyzeChunk(buf); + bf.PostFilter(&buf); + + Interleave(buf.channels(), buf.num_frames(), + buf.num_channels(), &interleaved[0]); + FloatToFloatS16(&interleaved[0], interleaved.size(), &interleaved[0]); + out_file.WriteSamples(&interleaved[0], interleaved.size()); + } + + return 0; +} + +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc new file mode 100644 index 0000000000..78b2f0a68e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/beamformer/nonlinear_beamformer.h" + +#include <math.h> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kChunkSizeMs = 10; +const int kSampleRateHz = 16000; + +SphericalPointf AzimuthToSphericalPoint(float azimuth_radians) { + return SphericalPointf(azimuth_radians, 0.f, 1.f); +} + +void Verify(NonlinearBeamformer* bf, float target_azimuth_radians) { + EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint(target_azimuth_radians))); + EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians - NonlinearBeamformer::kHalfBeamWidthRadians + + 0.001f))); + EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians + NonlinearBeamformer::kHalfBeamWidthRadians - + 0.001f))); + EXPECT_FALSE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians - NonlinearBeamformer::kHalfBeamWidthRadians - + 0.001f))); + EXPECT_FALSE(bf->IsInBeam(AzimuthToSphericalPoint( + target_azimuth_radians + NonlinearBeamformer::kHalfBeamWidthRadians + + 0.001f))); +} + +void AimAndVerify(NonlinearBeamformer* bf, float target_azimuth_radians) { + bf->AimAt(AzimuthToSphericalPoint(target_azimuth_radians)); + Verify(bf, target_azimuth_radians); +} + +// Bitexactness test code. +const size_t kNumFramesToProcess = 1000; + +void ProcessOneFrame(int sample_rate_hz, + AudioBuffer* capture_audio_buffer, + NonlinearBeamformer* beamformer) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + beamformer->AnalyzeChunk(*capture_audio_buffer->split_data_f()); + capture_audio_buffer->set_num_channels(1); + beamformer->PostFilter(capture_audio_buffer->split_data_f()); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +int BeamformerSampleRate(int sample_rate_hz) { + return (sample_rate_hz > AudioProcessing::kSampleRate16kHz + ? AudioProcessing::kSampleRate16kHz + : sample_rate_hz); +} + +void RunBitExactnessTest(int sample_rate_hz, + const std::vector<Point>& array_geometry, + const SphericalPointf& target_direction, + rtc::ArrayView<const float> output_reference) { + NonlinearBeamformer beamformer(array_geometry, 1u, target_direction); + beamformer.Initialize(AudioProcessing::kChunkSizeMs, + BeamformerSampleRate(sample_rate_hz)); + + const StreamConfig capture_config(sample_rate_hz, array_geometry.size(), + false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(capture_config.num_frames() * + capture_config.num_channels()); + for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(capture_config.num_frames(), + capture_config.num_channels(), &capture_file, + capture_input); + + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, &capture_buffer, &beamformer); + } + + // Extract and verify the test results. + std::vector<float> capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + const float kElementErrorBound = 1.f / static_cast<float>(1 << 15); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +// TODO(peah): Add bitexactness tests for scenarios with more than 2 input +// channels. +std::vector<Point> CreateArrayGeometry(int variant) { + std::vector<Point> array_geometry; + switch (variant) { + case 1: + array_geometry.push_back(Point(-0.025f, 0.f, 0.f)); + array_geometry.push_back(Point(0.025f, 0.f, 0.f)); + break; + case 2: + array_geometry.push_back(Point(-0.035f, 0.f, 0.f)); + array_geometry.push_back(Point(0.035f, 0.f, 0.f)); + break; + case 3: + array_geometry.push_back(Point(-0.5f, 0.f, 0.f)); + array_geometry.push_back(Point(0.5f, 0.f, 0.f)); + break; + default: + RTC_CHECK(false); + } + return array_geometry; +} + +const SphericalPointf TargetDirection1(0.4f * static_cast<float>(M_PI) / 2.f, + 0.f, + 1.f); +const SphericalPointf TargetDirection2(static_cast<float>(M_PI) / 2.f, + 1.f, + 2.f); + +} // namespace + +TEST(NonlinearBeamformerTest, AimingModifiesBeam) { + std::vector<Point> array_geometry; + array_geometry.push_back(Point(-0.025f, 0.f, 0.f)); + array_geometry.push_back(Point(0.025f, 0.f, 0.f)); + NonlinearBeamformer bf(array_geometry, 1u); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + // The default constructor parameter sets the target angle to PI / 2. + Verify(&bf, static_cast<float>(M_PI) / 2.f); + AimAndVerify(&bf, static_cast<float>(M_PI) / 3.f); + AimAndVerify(&bf, 3.f * static_cast<float>(M_PI) / 4.f); + AimAndVerify(&bf, static_cast<float>(M_PI) / 6.f); + AimAndVerify(&bf, static_cast<float>(M_PI)); +} + +TEST(NonlinearBeamformerTest, InterfAnglesTakeAmbiguityIntoAccount) { + { + // For linear arrays there is ambiguity. + std::vector<Point> array_geometry; + array_geometry.push_back(Point(-0.1f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.2f, 0.f, 0.f)); + NonlinearBeamformer bf(array_geometry, 1u); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI - bf.away_radians_ / 2.f, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } + { + // For planar arrays with normal in the xy-plane there is ambiguity. + std::vector<Point> array_geometry; + array_geometry.push_back(Point(-0.1f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.2f, 0.f, 0.f)); + array_geometry.push_back(Point(0.1f, 0.f, 0.2f)); + array_geometry.push_back(Point(0.f, 0.f, -0.1f)); + NonlinearBeamformer bf(array_geometry, 1u); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI - bf.away_radians_ / 2.f, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } + { + // For planar arrays with normal not in the xy-plane there is no ambiguity. + std::vector<Point> array_geometry; + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.2f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.1f, -0.2f)); + NonlinearBeamformer bf(array_geometry, 1u); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(-bf.away_radians_ / 2.f, bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } + { + // For arrays which are not linear or planar there is no ambiguity. + std::vector<Point> array_geometry; + array_geometry.push_back(Point(0.f, 0.f, 0.f)); + array_geometry.push_back(Point(0.1f, 0.f, 0.f)); + array_geometry.push_back(Point(0.f, 0.2f, 0.f)); + array_geometry.push_back(Point(0.f, 0.f, 0.3f)); + NonlinearBeamformer bf(array_geometry, 1u); + bf.Initialize(kChunkSizeMs, kSampleRateHz); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, + bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_, + bf.interf_angles_radians_[1]); + bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f)); + EXPECT_EQ(2u, bf.interf_angles_radians_.size()); + EXPECT_FLOAT_EQ(-bf.away_radians_ / 2.f, bf.interf_angles_radians_[0]); + EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]); + } +} + +// TODO(peah): Investigate why the nonlinear_beamformer.cc causes a DCHECK in +// this setup. +TEST(BeamformerBitExactnessTest, + DISABLED_Stereo8kHz_ArrayGeometry1_TargetDirection1) { + const float kOutputReference[] = {0.001318f, -0.001091f, 0.000990f, + 0.001318f, -0.001091f, 0.000990f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate8kHz, CreateArrayGeometry(1), + TargetDirection1, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo16kHz_ArrayGeometry1_TargetDirection1) { + const float kOutputReference[] = {-0.000077f, -0.000147f, -0.000138f, + -0.000077f, -0.000147f, -0.000138f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(1), + TargetDirection1, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo32kHz_ArrayGeometry1_TargetDirection1) { + const float kOutputReference[] = {-0.000061f, -0.000061f, -0.000061f, + -0.000061f, -0.000061f, -0.000061f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(1), + TargetDirection1, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo48kHz_ArrayGeometry1_TargetDirection1) { + const float kOutputReference[] = {0.000450f, 0.000436f, 0.000433f, + 0.000450f, 0.000436f, 0.000433f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(1), + TargetDirection1, kOutputReference); +} + +// TODO(peah): Investigate why the nonlinear_beamformer.cc causes a DCHECK in +// this setup. +TEST(BeamformerBitExactnessTest, + DISABLED_Stereo8kHz_ArrayGeometry1_TargetDirection2) { + const float kOutputReference[] = {0.001144f, -0.001026f, 0.001074f, + -0.016205f, -0.007324f, -0.015656f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate8kHz, CreateArrayGeometry(1), + TargetDirection2, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo16kHz_ArrayGeometry1_TargetDirection2) { + const float kOutputReference[] = {0.000221f, -0.000249f, 0.000140f, + 0.000221f, -0.000249f, 0.000140f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(1), + TargetDirection2, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo32kHz_ArrayGeometry1_TargetDirection2) { + const float kOutputReference[] = {0.000763f, -0.000336f, 0.000549f, + 0.000763f, -0.000336f, 0.000549f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(1), + TargetDirection2, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo48kHz_ArrayGeometry1_TargetDirection2) { + const float kOutputReference[] = {-0.000004f, -0.000494f, 0.000255f, + -0.000004f, -0.000494f, 0.000255f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(1), + TargetDirection2, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo8kHz_ArrayGeometry2_TargetDirection2) { + const float kOutputReference[] = {-0.000914f, 0.002170f, -0.002382f, + -0.000914f, 0.002170f, -0.002382f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate8kHz, CreateArrayGeometry(2), + TargetDirection2, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo16kHz_ArrayGeometry2_TargetDirection2) { + const float kOutputReference[] = {0.000179f, -0.000179f, 0.000081f, + 0.000179f, -0.000179f, 0.000081f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(2), + TargetDirection2, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo32kHz_ArrayGeometry2_TargetDirection2) { + const float kOutputReference[] = {0.000549f, -0.000214f, 0.000366f, + 0.000549f, -0.000214f, 0.000366f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(2), + TargetDirection2, kOutputReference); +} + +TEST(BeamformerBitExactnessTest, + Stereo48kHz_ArrayGeometry2_TargetDirection2) { + const float kOutputReference[] = {0.000019f, -0.000310f, 0.000182f, + 0.000019f, -0.000310f, 0.000182f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(2), + TargetDirection2, kOutputReference); +} + +// TODO(peah): Investigate why the nonlinear_beamformer.cc causes a DCHECK in +// this setup. +TEST(BeamformerBitExactnessTest, + DISABLED_Stereo16kHz_ArrayGeometry3_TargetDirection1) { + const float kOutputReference[] = {-0.000161f, 0.000171f, -0.000096f, + 0.001007f, 0.000427f, 0.000977f}; + + RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(3), + TargetDirection1, kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/common.h b/third_party/libwebrtc/webrtc/modules/audio_processing/common.h new file mode 100644 index 0000000000..d8532c5749 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/common.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_COMMON_H_ + +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +static inline size_t ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kMonoAndKeyboard: + return 1; + case AudioProcessing::kStereo: + case AudioProcessing::kStereoAndKeyboard: + return 2; + } + RTC_NOTREACHED(); + return 0; +} + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_COMMON_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/config_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/config_unittest.cc new file mode 100644 index 0000000000..8776ee3391 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/config_unittest.cc @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/audio_processing/include/config.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +struct MyExperiment { + static const ConfigOptionID identifier = ConfigOptionID::kMyExperimentForTest; + static const int kDefaultFactor; + static const int kDefaultOffset; + + MyExperiment() + : factor(kDefaultFactor), offset(kDefaultOffset) {} + + MyExperiment(int factor, int offset) + : factor(factor), offset(offset) {} + + int factor; + int offset; +}; + +const int MyExperiment::kDefaultFactor = 1; +const int MyExperiment::kDefaultOffset = 2; + +TEST(Config, ReturnsDefaultInstanceIfNotConfigured) { + Config config; + const MyExperiment& my_exp = config.Get<MyExperiment>(); + EXPECT_EQ(MyExperiment::kDefaultFactor, my_exp.factor); + EXPECT_EQ(MyExperiment::kDefaultOffset, my_exp.offset); +} + +TEST(Config, ReturnOptionWhenSet) { + Config config; + config.Set<MyExperiment>(new MyExperiment(5, 1)); + const MyExperiment& my_exp = config.Get<MyExperiment>(); + EXPECT_EQ(5, my_exp.factor); + EXPECT_EQ(1, my_exp.offset); +} + +TEST(Config, SetNullSetsTheOptionBackToDefault) { + Config config; + config.Set<MyExperiment>(new MyExperiment(5, 1)); + config.Set<MyExperiment>(NULL); + const MyExperiment& my_exp = config.Get<MyExperiment>(); + EXPECT_EQ(MyExperiment::kDefaultFactor, my_exp.factor); + EXPECT_EQ(MyExperiment::kDefaultOffset, my_exp.offset); +} + +struct Algo1_CostFunction { + static const ConfigOptionID identifier = + ConfigOptionID::kAlgo1CostFunctionForTest; + Algo1_CostFunction() {} + + virtual int cost(int x) const { + return x; + } + + virtual ~Algo1_CostFunction() {} +}; + +struct SqrCost : Algo1_CostFunction { + virtual int cost(int x) const { + return x*x; + } +}; + +TEST(Config, SupportsPolymorphism) { + Config config; + config.Set<Algo1_CostFunction>(new SqrCost()); + EXPECT_EQ(25, config.Get<Algo1_CostFunction>().cost(5)); +} +} // namespace +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/debug.proto b/third_party/libwebrtc/webrtc/modules/audio_processing/debug.proto new file mode 100644 index 0000000000..44177735e8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/debug.proto @@ -0,0 +1,94 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.audioproc; + +// Contains the format of input/output/reverse audio. An Init message is added +// when any of the fields are changed. +message Init { + optional int32 sample_rate = 1; + optional int32 device_sample_rate = 2 [deprecated=true]; + optional int32 num_input_channels = 3; + optional int32 num_output_channels = 4; + optional int32 num_reverse_channels = 5; + optional int32 reverse_sample_rate = 6; + optional int32 output_sample_rate = 7; + optional int32 reverse_output_sample_rate = 8; + optional int32 num_reverse_output_channels = 9; +} + +// May contain interleaved or deinterleaved data, but don't store both formats. +message ReverseStream { + // int16 interleaved data. + optional bytes data = 1; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes channel = 2; +} + +// May contain interleaved or deinterleaved data, but don't store both formats. +message Stream { + // int16 interleaved data. + optional bytes input_data = 1; + optional bytes output_data = 2; + + optional int32 delay = 3; + optional sint32 drift = 4; + optional int32 level = 5; + optional bool keypress = 6; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes input_channel = 7; + repeated bytes output_channel = 8; +} + +// Contains the configurations of various APM component. A Config message is +// added when any of the fields are changed. +message Config { + // Next field number 19. + // Acoustic echo canceler. + optional bool aec_enabled = 1; + optional bool aec_delay_agnostic_enabled = 2; + optional bool aec_drift_compensation_enabled = 3; + optional bool aec_extended_filter_enabled = 4; + optional int32 aec_suppression_level = 5; + // Mobile AEC. + optional bool aecm_enabled = 6; + optional bool aecm_comfort_noise_enabled = 7; + optional int32 aecm_routing_mode = 8; + // Automatic gain controller. + optional bool agc_enabled = 9; + optional int32 agc_mode = 10; + optional bool agc_limiter_enabled = 11; + optional bool noise_robust_agc_enabled = 12; + // High pass filter. + optional bool hpf_enabled = 13; + // Noise suppression. + optional bool ns_enabled = 14; + optional int32 ns_level = 15; + // Transient suppression. + optional bool transient_suppression_enabled = 16; + // Semicolon-separated string containing experimental feature + // descriptions. + optional string experiments_description = 17; + // Intelligibility Enhancer + optional bool intelligibility_enhancer_enabled = 18; +} + +message Event { + enum Type { + INIT = 0; + REVERSE_STREAM = 1; + STREAM = 2; + CONFIG = 3; + UNKNOWN_EVENT = 4; + } + + required Type type = 1; + + optional Init init = 2; + optional ReverseStream reverse_stream = 3; + optional Stream stream = 4; + optional Config config = 5; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc new file mode 100644 index 0000000000..857cb1c825 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc @@ -0,0 +1,357 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/echo_cancellation_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 100; + +void SetupComponent(int sample_rate_hz, + EchoCancellation::SuppressionLevel suppression_level, + bool drift_compensation_enabled, + EchoCancellationImpl* echo_canceller) { + echo_canceller->Initialize(sample_rate_hz, 1, 1, 1); + EchoCancellation* ec = static_cast<EchoCancellation*>(echo_canceller); + ec->Enable(true); + ec->set_suppression_level(suppression_level); + ec->enable_drift_compensation(drift_compensation_enabled); + + Config config; + config.Set<DelayAgnostic>(new DelayAgnostic(true)); + config.Set<ExtendedFilter>(new ExtendedFilter(true)); + echo_canceller->SetExtraOptions(config); +} + +void ProcessOneFrame(int sample_rate_hz, + int stream_delay_ms, + bool drift_compensation_enabled, + int stream_drift_samples, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + EchoCancellationImpl* echo_canceller) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + std::vector<float> render_audio; + EchoCancellationImpl::PackRenderAudioBuffer( + render_audio_buffer, 1, render_audio_buffer->num_channels(), + &render_audio); + echo_canceller->ProcessRenderAudio(render_audio); + + if (drift_compensation_enabled) { + static_cast<EchoCancellation*>(echo_canceller) + ->set_stream_drift_samples(stream_drift_samples); + } + + echo_canceller->ProcessCaptureAudio(capture_audio_buffer, stream_delay_ms); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + int stream_delay_ms, + bool drift_compensation_enabled, + int stream_drift_samples, + EchoCancellation::SuppressionLevel suppression_level, + bool stream_has_echo_reference, + const rtc::ArrayView<const float>& output_reference) { + rtc::CriticalSection crit_render; + rtc::CriticalSection crit_capture; + EchoCancellationImpl echo_canceller(&crit_render, &crit_capture); + SetupComponent(sample_rate_hz, suppression_level, drift_compensation_enabled, + &echo_canceller); + + const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig render_config(sample_rate_hz, num_channels, false); + AudioBuffer render_buffer( + render_config.num_frames(), render_config.num_channels(), + render_config.num_frames(), 1, render_config.num_frames()); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector<float> render_input(samples_per_channel * num_channels); + + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), 1, capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(samples_per_channel * num_channels); + + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &render_file, render_input); + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, stream_delay_ms, drift_compensation_enabled, + stream_drift_samples, &render_buffer, &capture_buffer, + &echo_canceller); + } + + // Extract and verify the test results. + std::vector<float> capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + EXPECT_EQ(stream_has_echo_reference, + static_cast<EchoCancellation*>(&echo_canceller)->stream_has_echo()); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +const bool kStreamHasEchoReference = true; + +} // namespace + +// TODO(peah): Activate all these tests for ARM and ARM64 once the issue on the +// Chromium ARM and ARM64 boths have been identified. This is tracked in the +// issue https://bugs.chromium.org/p/webrtc/issues/detail?id=5711. + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono8kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono8kHz_HighLevel_NoDrift_StreamDelay0) { +#endif + const float kOutputReference[] = {-0.000646f, -0.001525f, 0.002688f}; + RunBitexactnessTest(8000, 1, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono16kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono16kHz_HighLevel_NoDrift_StreamDelay0) { +#endif + const float kOutputReference[] = {0.000055f, 0.000421f, 0.001149f}; + RunBitexactnessTest(16000, 1, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono32kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono32kHz_HighLevel_NoDrift_StreamDelay0) { +#endif + const float kOutputReference[] = {-0.000671f, 0.000061f, -0.000031f}; + RunBitexactnessTest(32000, 1, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono48kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono48kHz_HighLevel_NoDrift_StreamDelay0) { +#endif + const float kOutputReference[] = {-0.001403f, -0.001411f, -0.000755f}; + RunBitexactnessTest(48000, 1, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono16kHz_LowLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono16kHz_LowLevel_NoDrift_StreamDelay0) { +#endif +#if defined(WEBRTC_MAC) + const float kOutputReference[] = {-0.000145f, 0.000179f, 0.000917f}; +#else + const float kOutputReference[] = {-0.000009f, 0.000363f, 0.001094f}; +#endif + RunBitexactnessTest(16000, 1, 0, false, 0, + EchoCancellation::SuppressionLevel::kLowSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono16kHz_ModerateLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono16kHz_ModerateLevel_NoDrift_StreamDelay0) { +#endif + const float kOutputReference[] = {0.000055f, 0.000421f, 0.001149f}; + RunBitexactnessTest(16000, 1, 0, false, 0, + EchoCancellation::SuppressionLevel::kModerateSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono16kHz_HighLevel_NoDrift_StreamDelay10) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono16kHz_HighLevel_NoDrift_StreamDelay10) { +#endif + const float kOutputReference[] = {0.000055f, 0.000421f, 0.001149f}; + RunBitexactnessTest(16000, 1, 10, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono16kHz_HighLevel_NoDrift_StreamDelay20) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono16kHz_HighLevel_NoDrift_StreamDelay20) { +#endif + const float kOutputReference[] = {0.000055f, 0.000421f, 0.001149f}; + RunBitexactnessTest(16000, 1, 20, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono16kHz_HighLevel_Drift0_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono16kHz_HighLevel_Drift0_StreamDelay0) { +#endif + const float kOutputReference[] = {0.000055f, 0.000421f, 0.001149f}; + RunBitexactnessTest(16000, 1, 0, true, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Mono16kHz_HighLevel_Drift5_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Mono16kHz_HighLevel_Drift5_StreamDelay0) { +#endif + const float kOutputReference[] = {0.000055f, 0.000421f, 0.001149f}; + RunBitexactnessTest(16000, 1, 0, true, 5, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Stereo8kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Stereo8kHz_HighLevel_NoDrift_StreamDelay0) { +#endif +#if defined(WEBRTC_MAC) + const float kOutputReference[] = {-0.000392f, -0.001449f, 0.003004f, + -0.000392f, -0.001449f, 0.003004f}; +#else + const float kOutputReference[] = {-0.000464f, -0.001525f, 0.002933f, + -0.000464f, -0.001525f, 0.002933f}; +#endif + RunBitexactnessTest(8000, 2, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Stereo16kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Stereo16kHz_HighLevel_NoDrift_StreamDelay0) { +#endif + const float kOutputReference[] = {0.000166f, 0.000735f, 0.000841f, + 0.000166f, 0.000735f, 0.000841f}; + RunBitexactnessTest(16000, 2, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Stereo32kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Stereo32kHz_HighLevel_NoDrift_StreamDelay0) { +#endif +#if defined(WEBRTC_MAC) + const float kOutputReference[] = {-0.000458f, 0.000244f, 0.000153f, + -0.000458f, 0.000244f, 0.000153f}; +#else + const float kOutputReference[] = {-0.000427f, 0.000183f, 0.000183f, + -0.000427f, 0.000183f, 0.000183f}; +#endif + RunBitexactnessTest(32000, 2, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(EchoCancellationBitExactnessTest, + Stereo48kHz_HighLevel_NoDrift_StreamDelay0) { +#else +TEST(EchoCancellationBitExactnessTest, + DISABLED_Stereo48kHz_HighLevel_NoDrift_StreamDelay0) { +#endif + const float kOutputReference[] = {-0.001101f, -0.001101f, -0.000449f, + -0.001101f, -0.001101f, -0.000449f}; + RunBitexactnessTest(48000, 2, 0, false, 0, + EchoCancellation::SuppressionLevel::kHighSuppression, + kStreamHasEchoReference, kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl.cc new file mode 100644 index 0000000000..99f676c562 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl.cc @@ -0,0 +1,516 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_cancellation_impl.h" + +#include <string.h> + +#include "modules/audio_processing/aec/aec_core.h" +#include "modules/audio_processing/aec/echo_cancellation.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/checks.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { +int16_t MapSetting(EchoCancellation::SuppressionLevel level) { + switch (level) { + case EchoCancellation::kLowSuppression: + return kAecNlpConservative; + case EchoCancellation::kModerateSuppression: + return kAecNlpModerate; + case EchoCancellation::kHighSuppression: + return kAecNlpAggressive; + } + RTC_NOTREACHED(); + return -1; +} + +AudioProcessing::Error MapError(int err) { + switch (err) { + case AEC_UNSUPPORTED_FUNCTION_ERROR: + return AudioProcessing::kUnsupportedFunctionError; + case AEC_BAD_PARAMETER_ERROR: + return AudioProcessing::kBadParameterError; + case AEC_BAD_PARAMETER_WARNING: + return AudioProcessing::kBadStreamParameterWarning; + default: + // AEC_UNSPECIFIED_ERROR + // AEC_UNINITIALIZED_ERROR + // AEC_NULL_POINTER_ERROR + return AudioProcessing::kUnspecifiedError; + } +} + +bool EnforceZeroStreamDelay() { +#if defined(CHROMEOS) + return !field_trial::IsEnabled("WebRTC-Aec2ZeroStreamDelayKillSwitch"); +#else + return false; +#endif +} + +} // namespace + +struct EchoCancellationImpl::StreamProperties { + StreamProperties() = delete; + StreamProperties(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels, + size_t num_proc_channels) + : sample_rate_hz(sample_rate_hz), + num_reverse_channels(num_reverse_channels), + num_output_channels(num_output_channels), + num_proc_channels(num_proc_channels) {} + + const int sample_rate_hz; + const size_t num_reverse_channels; + const size_t num_output_channels; + const size_t num_proc_channels; +}; + +class EchoCancellationImpl::Canceller { + public: + Canceller() { + state_ = WebRtcAec_Create(); + RTC_DCHECK(state_); + } + + ~Canceller() { + RTC_CHECK(state_); + WebRtcAec_Free(state_); + } + + void* state() { return state_; } + + void Initialize(int sample_rate_hz) { + // TODO(ajm): Drift compensation is disabled in practice. If restored, it + // should be managed internally and not depend on the hardware sample rate. + // For now, just hardcode a 48 kHz value. + const int error = WebRtcAec_Init(state_, sample_rate_hz, 48000); + RTC_DCHECK_EQ(0, error); + } + + private: + void* state_; +}; + +EchoCancellationImpl::EchoCancellationImpl(rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture) + : crit_render_(crit_render), + crit_capture_(crit_capture), + drift_compensation_enabled_(false), + metrics_enabled_(false), + suppression_level_(kModerateSuppression), + stream_drift_samples_(0), + was_stream_drift_set_(false), + stream_has_echo_(false), + delay_logging_enabled_(false), + extended_filter_enabled_(false), + delay_agnostic_enabled_(false), + enforce_zero_stream_delay_(EnforceZeroStreamDelay()) { + RTC_DCHECK(crit_render); + RTC_DCHECK(crit_capture); +} + +EchoCancellationImpl::~EchoCancellationImpl() = default; + +void EchoCancellationImpl::ProcessRenderAudio( + rtc::ArrayView<const float> packed_render_audio) { + rtc::CritScope cs_capture(crit_capture_); + if (!enabled_) { + return; + } + + RTC_DCHECK(stream_properties_); + size_t handle_index = 0; + size_t buffer_index = 0; + const size_t num_frames_per_band = + packed_render_audio.size() / (stream_properties_->num_output_channels * + stream_properties_->num_reverse_channels); + for (size_t i = 0; i < stream_properties_->num_output_channels; i++) { + for (size_t j = 0; j < stream_properties_->num_reverse_channels; j++) { + WebRtcAec_BufferFarend(cancellers_[handle_index++]->state(), + &packed_render_audio[buffer_index], + num_frames_per_band); + + buffer_index += num_frames_per_band; + } + } +} + + +int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio, + int stream_delay_ms) { + rtc::CritScope cs_capture(crit_capture_); + if (!enabled_) { + return AudioProcessing::kNoError; + } + + const int stream_delay_ms_use = + enforce_zero_stream_delay_ ? 0 : stream_delay_ms; + + if (drift_compensation_enabled_ && !was_stream_drift_set_) { + return AudioProcessing::kStreamParameterNotSetError; + } + + RTC_DCHECK(stream_properties_); + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), stream_properties_->num_proc_channels); + + int err = AudioProcessing::kNoError; + + // The ordering convention must be followed to pass to the correct AEC. + size_t handle_index = 0; + stream_has_echo_ = false; + for (size_t i = 0; i < audio->num_channels(); i++) { + for (size_t j = 0; j < stream_properties_->num_reverse_channels; j++) { + err = WebRtcAec_Process(cancellers_[handle_index]->state(), + audio->split_bands_const_f(i), audio->num_bands(), + audio->split_bands_f(i), + audio->num_frames_per_band(), stream_delay_ms_use, + stream_drift_samples_); + + if (err != AudioProcessing::kNoError) { + err = MapError(err); + // TODO(ajm): Figure out how to return warnings properly. + if (err != AudioProcessing::kBadStreamParameterWarning) { + return err; + } + } + + int status = 0; + err = WebRtcAec_get_echo_status(cancellers_[handle_index]->state(), + &status); + if (err != AudioProcessing::kNoError) { + return MapError(err); + } + + if (status == 1) { + stream_has_echo_ = true; + } + + handle_index++; + } + } + + was_stream_drift_set_ = false; + return AudioProcessing::kNoError; +} + +int EchoCancellationImpl::Enable(bool enable) { + // Run in a single-threaded manner. + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + + if (enable && !enabled_) { + enabled_ = enable; // Must be set before Initialize() is called. + + // TODO(peah): Simplify once the Enable function has been removed from + // the public APM API. + RTC_DCHECK(stream_properties_); + Initialize(stream_properties_->sample_rate_hz, + stream_properties_->num_reverse_channels, + stream_properties_->num_output_channels, + stream_properties_->num_proc_channels); + } else { + enabled_ = enable; + } + return AudioProcessing::kNoError; +} + +bool EchoCancellationImpl::is_enabled() const { + rtc::CritScope cs(crit_capture_); + return enabled_; +} + +int EchoCancellationImpl::set_suppression_level(SuppressionLevel level) { + { + if (MapSetting(level) == -1) { + return AudioProcessing::kBadParameterError; + } + rtc::CritScope cs(crit_capture_); + suppression_level_ = level; + } + return Configure(); +} + +EchoCancellation::SuppressionLevel EchoCancellationImpl::suppression_level() + const { + rtc::CritScope cs(crit_capture_); + return suppression_level_; +} + +int EchoCancellationImpl::enable_drift_compensation(bool enable) { + { + rtc::CritScope cs(crit_capture_); + drift_compensation_enabled_ = enable; + } + return Configure(); +} + +bool EchoCancellationImpl::is_drift_compensation_enabled() const { + rtc::CritScope cs(crit_capture_); + return drift_compensation_enabled_; +} + +void EchoCancellationImpl::set_stream_drift_samples(int drift) { + rtc::CritScope cs(crit_capture_); + was_stream_drift_set_ = true; + stream_drift_samples_ = drift; +} + +int EchoCancellationImpl::stream_drift_samples() const { + rtc::CritScope cs(crit_capture_); + return stream_drift_samples_; +} + +int EchoCancellationImpl::enable_metrics(bool enable) { + { + rtc::CritScope cs(crit_capture_); + metrics_enabled_ = enable; + } + return Configure(); +} + +bool EchoCancellationImpl::are_metrics_enabled() const { + rtc::CritScope cs(crit_capture_); + return enabled_ && metrics_enabled_; +} + +// TODO(ajm): we currently just use the metrics from the first AEC. Think more +// aboue the best way to extend this to multi-channel. +int EchoCancellationImpl::GetMetrics(Metrics* metrics) { + rtc::CritScope cs(crit_capture_); + if (metrics == NULL) { + return AudioProcessing::kNullPointerError; + } + + if (!enabled_ || !metrics_enabled_) { + return AudioProcessing::kNotEnabledError; + } + + AecMetrics my_metrics; + memset(&my_metrics, 0, sizeof(my_metrics)); + memset(metrics, 0, sizeof(Metrics)); + + const int err = WebRtcAec_GetMetrics(cancellers_[0]->state(), &my_metrics); + if (err != AudioProcessing::kNoError) { + return MapError(err); + } + + metrics->residual_echo_return_loss.instant = my_metrics.rerl.instant; + metrics->residual_echo_return_loss.average = my_metrics.rerl.average; + metrics->residual_echo_return_loss.maximum = my_metrics.rerl.max; + metrics->residual_echo_return_loss.minimum = my_metrics.rerl.min; + + metrics->echo_return_loss.instant = my_metrics.erl.instant; + metrics->echo_return_loss.average = my_metrics.erl.average; + metrics->echo_return_loss.maximum = my_metrics.erl.max; + metrics->echo_return_loss.minimum = my_metrics.erl.min; + + metrics->echo_return_loss_enhancement.instant = my_metrics.erle.instant; + metrics->echo_return_loss_enhancement.average = my_metrics.erle.average; + metrics->echo_return_loss_enhancement.maximum = my_metrics.erle.max; + metrics->echo_return_loss_enhancement.minimum = my_metrics.erle.min; + + metrics->a_nlp.instant = my_metrics.aNlp.instant; + metrics->a_nlp.average = my_metrics.aNlp.average; + metrics->a_nlp.maximum = my_metrics.aNlp.max; + metrics->a_nlp.minimum = my_metrics.aNlp.min; + + metrics->divergent_filter_fraction = my_metrics.divergent_filter_fraction; + return AudioProcessing::kNoError; +} + +bool EchoCancellationImpl::stream_has_echo() const { + rtc::CritScope cs(crit_capture_); + return stream_has_echo_; +} + +int EchoCancellationImpl::enable_delay_logging(bool enable) { + { + rtc::CritScope cs(crit_capture_); + delay_logging_enabled_ = enable; + } + return Configure(); +} + +bool EchoCancellationImpl::is_delay_logging_enabled() const { + rtc::CritScope cs(crit_capture_); + return enabled_ && delay_logging_enabled_; +} + +bool EchoCancellationImpl::is_delay_agnostic_enabled() const { + rtc::CritScope cs(crit_capture_); + return delay_agnostic_enabled_; +} + +std::string EchoCancellationImpl::GetExperimentsDescription() { + rtc::CritScope cs(crit_capture_); + return refined_adaptive_filter_enabled_ ? "RefinedAdaptiveFilter;" : ""; +} + +bool EchoCancellationImpl::is_refined_adaptive_filter_enabled() const { + rtc::CritScope cs(crit_capture_); + return refined_adaptive_filter_enabled_; +} + +bool EchoCancellationImpl::is_extended_filter_enabled() const { + rtc::CritScope cs(crit_capture_); + return extended_filter_enabled_; +} + +// TODO(bjornv): How should we handle the multi-channel case? +int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) { + rtc::CritScope cs(crit_capture_); + float fraction_poor_delays = 0; + return GetDelayMetrics(median, std, &fraction_poor_delays); +} + +int EchoCancellationImpl::GetDelayMetrics(int* median, int* std, + float* fraction_poor_delays) { + rtc::CritScope cs(crit_capture_); + if (median == NULL) { + return AudioProcessing::kNullPointerError; + } + if (std == NULL) { + return AudioProcessing::kNullPointerError; + } + + if (!enabled_ || !delay_logging_enabled_) { + return AudioProcessing::kNotEnabledError; + } + + const int err = WebRtcAec_GetDelayMetrics(cancellers_[0]->state(), median, + std, fraction_poor_delays); + if (err != AudioProcessing::kNoError) { + return MapError(err); + } + + return AudioProcessing::kNoError; +} + +struct AecCore* EchoCancellationImpl::aec_core() const { + rtc::CritScope cs(crit_capture_); + if (!enabled_) { + return NULL; + } + return WebRtcAec_aec_core(cancellers_[0]->state()); +} + +void EchoCancellationImpl::Initialize(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels, + size_t num_proc_channels) { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + + stream_properties_.reset( + new StreamProperties(sample_rate_hz, num_reverse_channels, + num_output_channels, num_proc_channels)); + + if (!enabled_) { + return; + } + + const size_t num_cancellers_required = + NumCancellersRequired(stream_properties_->num_output_channels, + stream_properties_->num_reverse_channels); + if (num_cancellers_required > cancellers_.size()) { + const size_t cancellers_old_size = cancellers_.size(); + cancellers_.resize(num_cancellers_required); + + for (size_t i = cancellers_old_size; i < cancellers_.size(); ++i) { + cancellers_[i].reset(new Canceller()); + } + } + + for (auto& canceller : cancellers_) { + canceller->Initialize(sample_rate_hz); + } + + Configure(); +} + +int EchoCancellationImpl::GetSystemDelayInSamples() const { + rtc::CritScope cs(crit_capture_); + RTC_DCHECK(enabled_); + // Report the delay for the first AEC component. + return WebRtcAec_system_delay( + WebRtcAec_aec_core(cancellers_[0]->state())); +} + +void EchoCancellationImpl::PackRenderAudioBuffer( + const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector<float>* packed_buffer) { + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(num_channels, audio->num_channels()); + + packed_buffer->clear(); + // The ordering convention must be followed to pass the correct data. + for (size_t i = 0; i < num_output_channels; i++) { + for (size_t j = 0; j < audio->num_channels(); j++) { + // Buffer the samples in the render queue. + packed_buffer->insert(packed_buffer->end(), + audio->split_bands_const_f(j)[kBand0To8kHz], + (audio->split_bands_const_f(j)[kBand0To8kHz] + + audio->num_frames_per_band())); + } + } +} + +void EchoCancellationImpl::SetExtraOptions(const webrtc::Config& config) { + { + rtc::CritScope cs(crit_capture_); + extended_filter_enabled_ = config.Get<ExtendedFilter>().enabled; + delay_agnostic_enabled_ = config.Get<DelayAgnostic>().enabled; + refined_adaptive_filter_enabled_ = + config.Get<RefinedAdaptiveFilter>().enabled; + } + Configure(); +} + +int EchoCancellationImpl::Configure() { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + AecConfig config; + config.metricsMode = metrics_enabled_; + config.nlpMode = MapSetting(suppression_level_); + config.skewMode = drift_compensation_enabled_; + config.delay_logging = delay_logging_enabled_; + + int error = AudioProcessing::kNoError; + for (auto& canceller : cancellers_) { + WebRtcAec_enable_extended_filter(WebRtcAec_aec_core(canceller->state()), + extended_filter_enabled_ ? 1 : 0); + WebRtcAec_enable_delay_agnostic(WebRtcAec_aec_core(canceller->state()), + delay_agnostic_enabled_ ? 1 : 0); + WebRtcAec_enable_refined_adaptive_filter( + WebRtcAec_aec_core(canceller->state()), + refined_adaptive_filter_enabled_); + const int handle_error = WebRtcAec_set_config(canceller->state(), config); + if (handle_error != AudioProcessing::kNoError) { + error = AudioProcessing::kNoError; + } + } + return error; +} + +size_t EchoCancellationImpl::NumCancellersRequired( + size_t num_output_channels, + size_t num_reverse_channels) { + return num_output_channels * num_reverse_channels; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl.h new file mode 100644 index 0000000000..6700249da0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ + +#include <memory> +#include <vector> + +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" + +namespace webrtc { + +class AudioBuffer; + +class EchoCancellationImpl : public EchoCancellation { + public: + EchoCancellationImpl(rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture); + ~EchoCancellationImpl() override; + + void ProcessRenderAudio(rtc::ArrayView<const float> packed_render_audio); + int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms); + + // EchoCancellation implementation. + bool is_enabled() const override; + int stream_drift_samples() const override; + SuppressionLevel suppression_level() const override; + bool is_drift_compensation_enabled() const override; + + void Initialize(int sample_rate_hz, + size_t num_reverse_channels_, + size_t num_output_channels_, + size_t num_proc_channels_); + void SetExtraOptions(const webrtc::Config& config); + bool is_delay_agnostic_enabled() const; + bool is_extended_filter_enabled() const; + std::string GetExperimentsDescription(); + bool is_refined_adaptive_filter_enabled() const; + + // Returns the system delay of the first AEC component. + int GetSystemDelayInSamples() const; + + static void PackRenderAudioBuffer(const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector<float>* packed_buffer); + static size_t NumCancellersRequired(size_t num_output_channels, + size_t num_reverse_channels); + + // Enable logging of various AEC statistics. + int enable_metrics(bool enable) override; + + // Provides various statistics about the AEC. + int GetMetrics(Metrics* metrics) override; + + // Enable logging of delay metrics. + int enable_delay_logging(bool enable) override; + + // Provides delay metrics. + int GetDelayMetrics(int* median, + int* std, + float* fraction_poor_delays) override; + + private: + class Canceller; + struct StreamProperties; + + // EchoCancellation implementation. + int Enable(bool enable) override; + int enable_drift_compensation(bool enable) override; + void set_stream_drift_samples(int drift) override; + int set_suppression_level(SuppressionLevel level) override; + bool are_metrics_enabled() const override; + bool stream_has_echo() const override; + bool is_delay_logging_enabled() const override; + int GetDelayMetrics(int* median, int* std) override; + + struct AecCore* aec_core() const override; + + void AllocateRenderQueue(); + int Configure(); + + rtc::CriticalSection* const crit_render_ RTC_ACQUIRED_BEFORE(crit_capture_); + rtc::CriticalSection* const crit_capture_; + + bool enabled_ = false; + bool drift_compensation_enabled_ RTC_GUARDED_BY(crit_capture_); + bool metrics_enabled_ RTC_GUARDED_BY(crit_capture_); + SuppressionLevel suppression_level_ RTC_GUARDED_BY(crit_capture_); + int stream_drift_samples_ RTC_GUARDED_BY(crit_capture_); + bool was_stream_drift_set_ RTC_GUARDED_BY(crit_capture_); + bool stream_has_echo_ RTC_GUARDED_BY(crit_capture_); + bool delay_logging_enabled_ RTC_GUARDED_BY(crit_capture_); + bool extended_filter_enabled_ RTC_GUARDED_BY(crit_capture_); + bool delay_agnostic_enabled_ RTC_GUARDED_BY(crit_capture_); + bool refined_adaptive_filter_enabled_ RTC_GUARDED_BY(crit_capture_) = false; + + // Only active on Chrome OS devices. + const bool enforce_zero_stream_delay_ RTC_GUARDED_BY(crit_capture_); + + std::vector<std::unique_ptr<Canceller>> cancellers_; + std::unique_ptr<StreamProperties> stream_properties_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(EchoCancellationImpl); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl_unittest.cc new file mode 100644 index 0000000000..2b9e5c5626 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_cancellation_impl_unittest.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <memory> + +#include "modules/audio_processing/aec/aec_core.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoCancellationInternalTest, ExtendedFilter) { + std::unique_ptr<AudioProcessing> ap(AudioProcessing::Create()); + EXPECT_TRUE(ap->echo_cancellation()->aec_core() == NULL); + + EXPECT_EQ(ap->kNoError, ap->echo_cancellation()->Enable(true)); + EXPECT_TRUE(ap->echo_cancellation()->is_enabled()); + + AecCore* aec_core = ap->echo_cancellation()->aec_core(); + ASSERT_TRUE(aec_core != NULL); + // Disabled by default. + EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(aec_core)); + + Config config; + config.Set<ExtendedFilter>(new ExtendedFilter(true)); + ap->SetExtraOptions(config); + EXPECT_EQ(1, WebRtcAec_extended_filter_enabled(aec_core)); + + // Retains setting after initialization. + EXPECT_EQ(ap->kNoError, ap->Initialize()); + EXPECT_EQ(1, WebRtcAec_extended_filter_enabled(aec_core)); + + config.Set<ExtendedFilter>(new ExtendedFilter(false)); + ap->SetExtraOptions(config); + EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(aec_core)); + + // Retains setting after initialization. + EXPECT_EQ(ap->kNoError, ap->Initialize()); + EXPECT_EQ(0, WebRtcAec_extended_filter_enabled(aec_core)); +} + +TEST(EchoCancellationInternalTest, DelayAgnostic) { + std::unique_ptr<AudioProcessing> ap(AudioProcessing::Create()); + EXPECT_TRUE(ap->echo_cancellation()->aec_core() == NULL); + + EXPECT_EQ(ap->kNoError, ap->echo_cancellation()->Enable(true)); + EXPECT_TRUE(ap->echo_cancellation()->is_enabled()); + + AecCore* aec_core = ap->echo_cancellation()->aec_core(); + ASSERT_TRUE(aec_core != NULL); + // Enabled by default. + EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(aec_core)); + + Config config; + config.Set<DelayAgnostic>(new DelayAgnostic(true)); + ap->SetExtraOptions(config); + EXPECT_EQ(1, WebRtcAec_delay_agnostic_enabled(aec_core)); + + // Retains setting after initialization. + EXPECT_EQ(ap->kNoError, ap->Initialize()); + EXPECT_EQ(1, WebRtcAec_delay_agnostic_enabled(aec_core)); + + config.Set<DelayAgnostic>(new DelayAgnostic(false)); + ap->SetExtraOptions(config); + EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(aec_core)); + + // Retains setting after initialization. + EXPECT_EQ(ap->kNoError, ap->Initialize()); + EXPECT_EQ(0, WebRtcAec_delay_agnostic_enabled(aec_core)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_impl.cc new file mode 100644 index 0000000000..ecb1810666 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_impl.cc @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_control_mobile_impl.h" + +#include <string.h> + +#include "modules/audio_processing/aecm/echo_control_mobile.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +namespace { +int16_t MapSetting(EchoControlMobile::RoutingMode mode) { + switch (mode) { + case EchoControlMobile::kQuietEarpieceOrHeadset: + return 0; + case EchoControlMobile::kEarpiece: + return 1; + case EchoControlMobile::kLoudEarpiece: + return 2; + case EchoControlMobile::kSpeakerphone: + return 3; + case EchoControlMobile::kLoudSpeakerphone: + return 4; + } + RTC_NOTREACHED(); + return -1; +} + +AudioProcessing::Error MapError(int err) { + switch (err) { + case AECM_UNSUPPORTED_FUNCTION_ERROR: + return AudioProcessing::kUnsupportedFunctionError; + case AECM_NULL_POINTER_ERROR: + return AudioProcessing::kNullPointerError; + case AECM_BAD_PARAMETER_ERROR: + return AudioProcessing::kBadParameterError; + case AECM_BAD_PARAMETER_WARNING: + return AudioProcessing::kBadStreamParameterWarning; + default: + // AECM_UNSPECIFIED_ERROR + // AECM_UNINITIALIZED_ERROR + return AudioProcessing::kUnspecifiedError; + } +} +} // namespace + +size_t EchoControlMobile::echo_path_size_bytes() { + return WebRtcAecm_echo_path_size_bytes(); +} + +struct EchoControlMobileImpl::StreamProperties { + StreamProperties() = delete; + StreamProperties(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels) + : sample_rate_hz(sample_rate_hz), + num_reverse_channels(num_reverse_channels), + num_output_channels(num_output_channels) {} + + int sample_rate_hz; + size_t num_reverse_channels; + size_t num_output_channels; +}; + +class EchoControlMobileImpl::Canceller { + public: + Canceller() { + state_ = WebRtcAecm_Create(); + RTC_CHECK(state_); + } + + ~Canceller() { + RTC_DCHECK(state_); + WebRtcAecm_Free(state_); + } + + void* state() { + RTC_DCHECK(state_); + return state_; + } + + void Initialize(int sample_rate_hz, + unsigned char* external_echo_path, + size_t echo_path_size_bytes) { + RTC_DCHECK(state_); + int error = WebRtcAecm_Init(state_, sample_rate_hz); + RTC_DCHECK_EQ(AudioProcessing::kNoError, error); + if (external_echo_path != NULL) { + error = WebRtcAecm_InitEchoPath(state_, external_echo_path, + echo_path_size_bytes); + RTC_DCHECK_EQ(AudioProcessing::kNoError, error); + } + } + + private: + void* state_; + RTC_DISALLOW_COPY_AND_ASSIGN(Canceller); +}; + +EchoControlMobileImpl::EchoControlMobileImpl(rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture) + : crit_render_(crit_render), + crit_capture_(crit_capture), + routing_mode_(kSpeakerphone), + comfort_noise_enabled_(true), + external_echo_path_(NULL) { + RTC_DCHECK(crit_render); + RTC_DCHECK(crit_capture); +} + +EchoControlMobileImpl::~EchoControlMobileImpl() { + if (external_echo_path_ != NULL) { + delete [] external_echo_path_; + external_echo_path_ = NULL; + } +} + +void EchoControlMobileImpl::ProcessRenderAudio( + rtc::ArrayView<const int16_t> packed_render_audio) { + rtc::CritScope cs_capture(crit_capture_); + if (!enabled_) { + return; + } + + RTC_DCHECK(stream_properties_); + + size_t buffer_index = 0; + size_t num_frames_per_band = + packed_render_audio.size() / (stream_properties_->num_output_channels * + stream_properties_->num_reverse_channels); + + for (auto& canceller : cancellers_) { + WebRtcAecm_BufferFarend(canceller->state(), + &packed_render_audio[buffer_index], + num_frames_per_band); + + buffer_index += num_frames_per_band; + } +} + +void EchoControlMobileImpl::PackRenderAudioBuffer( + const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector<int16_t>* packed_buffer) { + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(num_channels, audio->num_channels()); + + // The ordering convention must be followed to pass to the correct AECM. + packed_buffer->clear(); + int render_channel = 0; + for (size_t i = 0; i < num_output_channels; i++) { + for (size_t j = 0; j < audio->num_channels(); j++) { + // Buffer the samples in the render queue. + packed_buffer->insert( + packed_buffer->end(), + audio->split_bands_const(render_channel)[kBand0To8kHz], + (audio->split_bands_const(render_channel)[kBand0To8kHz] + + audio->num_frames_per_band())); + render_channel = (render_channel + 1) % audio->num_channels(); + } + } +} + +size_t EchoControlMobileImpl::NumCancellersRequired( + size_t num_output_channels, + size_t num_reverse_channels) { + return num_output_channels * num_reverse_channels; +} + +int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, + int stream_delay_ms) { + rtc::CritScope cs_capture(crit_capture_); + if (!enabled_) { + return AudioProcessing::kNoError; + } + + RTC_DCHECK(stream_properties_); + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), stream_properties_->num_output_channels); + RTC_DCHECK_GE(cancellers_.size(), stream_properties_->num_reverse_channels * + audio->num_channels()); + + int err = AudioProcessing::kNoError; + + // The ordering convention must be followed to pass to the correct AECM. + size_t handle_index = 0; + for (size_t capture = 0; capture < audio->num_channels(); ++capture) { + // TODO(ajm): improve how this works, possibly inside AECM. + // This is kind of hacked up. + const int16_t* noisy = audio->low_pass_reference(capture); + const int16_t* clean = audio->split_bands_const(capture)[kBand0To8kHz]; + if (noisy == NULL) { + noisy = clean; + clean = NULL; + } + for (size_t render = 0; render < stream_properties_->num_reverse_channels; + ++render) { + err = WebRtcAecm_Process(cancellers_[handle_index]->state(), noisy, clean, + audio->split_bands(capture)[kBand0To8kHz], + audio->num_frames_per_band(), stream_delay_ms); + + if (err != AudioProcessing::kNoError) { + return MapError(err); + } + + ++handle_index; + } + for (size_t band = 1u; band < audio->num_bands(); ++band) { + memset(audio->split_bands(capture)[band], + 0, + audio->num_frames_per_band() * + sizeof(audio->split_bands(capture)[band][0])); + } + } + return AudioProcessing::kNoError; +} + +int EchoControlMobileImpl::Enable(bool enable) { + // Ensure AEC and AECM are not both enabled. + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + RTC_DCHECK(stream_properties_); + + if (enable && + stream_properties_->sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + return AudioProcessing::kBadSampleRateError; + } + + if (enable && !enabled_) { + enabled_ = enable; // Must be set before Initialize() is called. + + // TODO(peah): Simplify once the Enable function has been removed from + // the public APM API. + Initialize(stream_properties_->sample_rate_hz, + stream_properties_->num_reverse_channels, + stream_properties_->num_output_channels); + } else { + enabled_ = enable; + } + return AudioProcessing::kNoError; +} + +bool EchoControlMobileImpl::is_enabled() const { + rtc::CritScope cs(crit_capture_); + return enabled_; +} + +int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) { + if (MapSetting(mode) == -1) { + return AudioProcessing::kBadParameterError; + } + + { + rtc::CritScope cs(crit_capture_); + routing_mode_ = mode; + } + return Configure(); +} + +EchoControlMobile::RoutingMode EchoControlMobileImpl::routing_mode() + const { + rtc::CritScope cs(crit_capture_); + return routing_mode_; +} + +int EchoControlMobileImpl::enable_comfort_noise(bool enable) { + { + rtc::CritScope cs(crit_capture_); + comfort_noise_enabled_ = enable; + } + return Configure(); +} + +bool EchoControlMobileImpl::is_comfort_noise_enabled() const { + rtc::CritScope cs(crit_capture_); + return comfort_noise_enabled_; +} + +int EchoControlMobileImpl::SetEchoPath(const void* echo_path, + size_t size_bytes) { + { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + if (echo_path == NULL) { + return AudioProcessing::kNullPointerError; + } + if (size_bytes != echo_path_size_bytes()) { + // Size mismatch + return AudioProcessing::kBadParameterError; + } + + if (external_echo_path_ == NULL) { + external_echo_path_ = new unsigned char[size_bytes]; + } + memcpy(external_echo_path_, echo_path, size_bytes); + } + + // TODO(peah): Simplify once the Enable function has been removed from + // the public APM API. + RTC_DCHECK(stream_properties_); + Initialize(stream_properties_->sample_rate_hz, + stream_properties_->num_reverse_channels, + stream_properties_->num_output_channels); + return AudioProcessing::kNoError; +} + +int EchoControlMobileImpl::GetEchoPath(void* echo_path, + size_t size_bytes) const { + rtc::CritScope cs(crit_capture_); + if (echo_path == NULL) { + return AudioProcessing::kNullPointerError; + } + if (size_bytes != echo_path_size_bytes()) { + // Size mismatch + return AudioProcessing::kBadParameterError; + } + if (!enabled_) { + return AudioProcessing::kNotEnabledError; + } + + // Get the echo path from the first channel + int32_t err = + WebRtcAecm_GetEchoPath(cancellers_[0]->state(), echo_path, size_bytes); + if (err != 0) { + return MapError(err); + } + + return AudioProcessing::kNoError; +} + +void EchoControlMobileImpl::Initialize(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels) { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + + stream_properties_.reset(new StreamProperties( + sample_rate_hz, num_reverse_channels, num_output_channels)); + + if (!enabled_) { + return; + } + + if (stream_properties_->sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + RTC_LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates"; + } + + cancellers_.resize( + NumCancellersRequired(stream_properties_->num_output_channels, + stream_properties_->num_reverse_channels)); + + for (auto& canceller : cancellers_) { + if (!canceller) { + canceller.reset(new Canceller()); + } + canceller->Initialize(sample_rate_hz, external_echo_path_, + echo_path_size_bytes()); + } + + Configure(); +} + +int EchoControlMobileImpl::Configure() { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + AecmConfig config; + config.cngMode = comfort_noise_enabled_; + config.echoMode = MapSetting(routing_mode_); + int error = AudioProcessing::kNoError; + for (auto& canceller : cancellers_) { + int handle_error = WebRtcAecm_set_config(canceller->state(), config); + if (handle_error != AudioProcessing::kNoError) { + error = handle_error; + } + } + return error; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_impl.h new file mode 100644 index 0000000000..a03ab4d486 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_impl.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ + +#include <memory> +#include <vector> + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/render_queue_item_verifier.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" +#include "rtc_base/swap_queue.h" + +namespace webrtc { + +class AudioBuffer; + +class EchoControlMobileImpl : public EchoControlMobile { + public: + EchoControlMobileImpl(rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture); + + ~EchoControlMobileImpl() override; + + void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio); + int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms); + + // EchoControlMobile implementation. + bool is_enabled() const override; + RoutingMode routing_mode() const override; + bool is_comfort_noise_enabled() const override; + + void Initialize(int sample_rate_hz, + size_t num_reverse_channels, + size_t num_output_channels); + + static void PackRenderAudioBuffer(const AudioBuffer* audio, + size_t num_output_channels, + size_t num_channels, + std::vector<int16_t>* packed_buffer); + + static size_t NumCancellersRequired(size_t num_output_channels, + size_t num_reverse_channels); + + private: + class Canceller; + struct StreamProperties; + + // EchoControlMobile implementation. + int Enable(bool enable) override; + int set_routing_mode(RoutingMode mode) override; + int enable_comfort_noise(bool enable) override; + int SetEchoPath(const void* echo_path, size_t size_bytes) override; + int GetEchoPath(void* echo_path, size_t size_bytes) const override; + + int Configure(); + + rtc::CriticalSection* const crit_render_ RTC_ACQUIRED_BEFORE(crit_capture_); + rtc::CriticalSection* const crit_capture_; + + bool enabled_ = false; + + RoutingMode routing_mode_ RTC_GUARDED_BY(crit_capture_); + bool comfort_noise_enabled_ RTC_GUARDED_BY(crit_capture_); + unsigned char* external_echo_path_ RTC_GUARDED_BY(crit_render_) + RTC_GUARDED_BY(crit_capture_); + + std::vector<std::unique_ptr<Canceller>> cancellers_; + std::unique_ptr<StreamProperties> stream_properties_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(EchoControlMobileImpl); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_unittest.cc new file mode 100644 index 0000000000..fb58a5b870 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_control_mobile_unittest.cc @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/echo_control_mobile_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// TODO(peah): Increase the number of frames to proces when the issue of +// non repeatable test results have been found. +const int kNumFramesToProcess = 200; + +void SetupComponent(int sample_rate_hz, + EchoControlMobile::RoutingMode routing_mode, + bool comfort_noise_enabled, + EchoControlMobileImpl* echo_control_mobile) { + echo_control_mobile->Initialize( + sample_rate_hz > 16000 ? 16000 : sample_rate_hz, 1, 1); + EchoControlMobile* ec = static_cast<EchoControlMobile*>(echo_control_mobile); + ec->Enable(true); + ec->set_routing_mode(routing_mode); + ec->enable_comfort_noise(comfort_noise_enabled); +} + +void ProcessOneFrame(int sample_rate_hz, + int stream_delay_ms, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + EchoControlMobileImpl* echo_control_mobile) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + std::vector<int16_t> render_audio; + EchoControlMobileImpl::PackRenderAudioBuffer( + render_audio_buffer, 1, render_audio_buffer->num_channels(), + &render_audio); + echo_control_mobile->ProcessRenderAudio(render_audio); + + echo_control_mobile->ProcessCaptureAudio(capture_audio_buffer, + stream_delay_ms); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + int stream_delay_ms, + EchoControlMobile::RoutingMode routing_mode, + bool comfort_noise_enabled, + const rtc::ArrayView<const float>& output_reference) { + rtc::CriticalSection crit_render; + rtc::CriticalSection crit_capture; + EchoControlMobileImpl echo_control_mobile(&crit_render, &crit_capture); + SetupComponent(sample_rate_hz, routing_mode, comfort_noise_enabled, + &echo_control_mobile); + + const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig render_config(sample_rate_hz, num_channels, false); + AudioBuffer render_buffer( + render_config.num_frames(), render_config.num_channels(), + render_config.num_frames(), 1, render_config.num_frames()); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector<float> render_input(samples_per_channel * num_channels); + + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), 1, capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(samples_per_channel * num_channels); + + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &render_file, render_input); + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, stream_delay_ms, &render_buffer, + &capture_buffer, &echo_control_mobile); + } + + // Extract and verify the test results. + std::vector<float> capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +} // namespace + +// TODO(peah): Renable once the integer overflow issue in aecm_core.c:932:69 +// has been solved. +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono8kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.005280f, 0.002380f, -0.000427f}; + + RunBitexactnessTest(8000, 1, 0, + EchoControlMobile::RoutingMode::kLoudSpeakerphone, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003601f, 0.002991f, 0.001923f}; + RunBitexactnessTest(16000, 1, 0, + EchoControlMobile::RoutingMode::kLoudSpeakerphone, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono32kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.002258f, 0.002899f, 0.003906f}; + + RunBitexactnessTest(32000, 1, 0, + EchoControlMobile::RoutingMode::kLoudSpeakerphone, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono48kHz_LoudSpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {-0.000046f, 0.000041f, 0.000249f}; + + RunBitexactnessTest(48000, 1, 0, + EchoControlMobile::RoutingMode::kLoudSpeakerphone, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOff_StreamDelay0) { + const float kOutputReference[] = {0.000000f, 0.000000f, 0.000000f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobile::RoutingMode::kLoudSpeakerphone, false, + kOutputReference); +} + +// TODO(peah): Renable once the integer overflow issue in aecm_core.c:932:69 +// has been solved. +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay5) { + const float kOutputReference[] = {0.003693f, 0.002930f, 0.001801f}; + + RunBitexactnessTest(16000, 1, 5, + EchoControlMobile::RoutingMode::kLoudSpeakerphone, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + Mono16kHz_LoudSpeakerPhone_CngOn_StreamDelay10) { + const float kOutputReference[] = {-0.002380f, -0.002533f, -0.002563f}; + + RunBitexactnessTest(16000, 1, 10, + EchoControlMobile::RoutingMode::kLoudSpeakerphone, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_QuietEarpieceOrHeadset_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.000397f, 0.000000f, -0.000305f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobile::RoutingMode::kQuietEarpieceOrHeadset, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_Earpiece_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.002167f, 0.001617f, 0.001038f}; + + RunBitexactnessTest(16000, 1, 0, EchoControlMobile::RoutingMode::kEarpiece, + true, kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_LoudEarpiece_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003540f, 0.002899f, 0.001862f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobile::RoutingMode::kLoudEarpiece, true, + kOutputReference); +} + +TEST(EchoControlMobileBitExactnessTest, + DISABLED_Mono16kHz_SpeakerPhone_CngOn_StreamDelay0) { + const float kOutputReference[] = {0.003632f, 0.003052f, 0.001984f}; + + RunBitexactnessTest(16000, 1, 0, + EchoControlMobile::RoutingMode::kSpeakerphone, true, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer.cc new file mode 100644 index 0000000000..0c6cc8a933 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/circular_buffer.h" + +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { + +CircularBuffer::CircularBuffer(size_t size) : buffer_(size) {} +CircularBuffer::~CircularBuffer() = default; + +void CircularBuffer::Push(float value) { + buffer_[next_insertion_index_] = value; + ++next_insertion_index_; + next_insertion_index_ %= buffer_.size(); + RTC_DCHECK_LT(next_insertion_index_, buffer_.size()); + nr_elements_in_buffer_ = std::min(nr_elements_in_buffer_ + 1, buffer_.size()); + RTC_DCHECK_LE(nr_elements_in_buffer_, buffer_.size()); +} + +rtc::Optional<float> CircularBuffer::Pop() { + if (nr_elements_in_buffer_ == 0) { + return rtc::nullopt; + } + const size_t index = + (buffer_.size() + next_insertion_index_ - nr_elements_in_buffer_) % + buffer_.size(); + RTC_DCHECK_LT(index, buffer_.size()); + --nr_elements_in_buffer_; + return buffer_[index]; +} + +void CircularBuffer::Clear() { + std::fill(buffer_.begin(), buffer_.end(), 0.f); + next_insertion_index_ = 0; + nr_elements_in_buffer_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer.h new file mode 100644 index 0000000000..53d4afb6d5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ + +#include <vector> + +#include "api/optional.h" + +namespace webrtc { + +// Ring buffer containing floating point values. +struct CircularBuffer { + public: + explicit CircularBuffer(size_t size); + ~CircularBuffer(); + + void Push(float value); + rtc::Optional<float> Pop(); + size_t Size() const { return nr_elements_in_buffer_; } + // This function fills the buffer with zeros, but does not change its size. + void Clear(); + + private: + std::vector<float> buffer_; + size_t next_insertion_index_ = 0; + // This is the number of elements that have been pushed into the circular + // buffer, not the allocated buffer size. + size_t nr_elements_in_buffer_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_CIRCULAR_BUFFER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc new file mode 100644 index 0000000000..657bd05888 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/circular_buffer_unittest.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/circular_buffer.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(CircularBufferTests, LessThanMaxTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + EXPECT_EQ(1.f, test_buffer.Pop()); + EXPECT_EQ(2.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, FillTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + test_buffer.Push(3.f); + EXPECT_EQ(1.f, test_buffer.Pop()); + EXPECT_EQ(2.f, test_buffer.Pop()); + EXPECT_EQ(3.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, OverflowTest) { + CircularBuffer test_buffer(3); + test_buffer.Push(1.f); + test_buffer.Push(2.f); + test_buffer.Push(3.f); + test_buffer.Push(4.f); + // Because the circular buffer has a size of 3, the first insert should have + // been forgotten. + EXPECT_EQ(2.f, test_buffer.Pop()); + EXPECT_EQ(3.f, test_buffer.Pop()); + EXPECT_EQ(4.f, test_buffer.Pop()); +} + +TEST(CircularBufferTests, ReadFromEmpty) { + CircularBuffer test_buffer(3); + EXPECT_EQ(rtc::nullopt, test_buffer.Pop()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc new file mode 100644 index 0000000000..10853f3d9c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" + +#include <math.h> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter controlling the adaptation speed. +constexpr float kAlpha = 0.001f; + +} // namespace + +void MeanVarianceEstimator::Update(float value) { + mean_ = (1.f - kAlpha) * mean_ + kAlpha * value; + variance_ = + (1.f - kAlpha) * variance_ + kAlpha * (value - mean_) * (value - mean_); + RTC_DCHECK(std::isfinite(mean_)); + RTC_DCHECK(std::isfinite(variance_)); +} + +float MeanVarianceEstimator::std_deviation() const { + RTC_DCHECK_GE(variance_, 0.f); + return sqrtf(variance_); +} + +float MeanVarianceEstimator::mean() const { + return mean_; +} + +void MeanVarianceEstimator::Clear() { + mean_ = 0.f; + variance_ = 0.f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h new file mode 100644 index 0000000000..7f793df1e8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ + +namespace webrtc { + +// This class iteratively estimates the mean and variance of a signal. +class MeanVarianceEstimator { + public: + void Update(float value); + float std_deviation() const; + float mean() const; + void Clear(); + + private: + // Estimate of the expected value of the input values. + float mean_ = 0.f; + // Estimate of the variance of the input values. + float variance_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MEAN_VARIANCE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc new file mode 100644 index 0000000000..f8efc3a799 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/mean_variance_estimator_unittest.cc @@ -0,0 +1,64 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(MeanVarianceEstimatorTests, InsertTwoValues) { + MeanVarianceEstimator test_estimator; + // Insert two values. + test_estimator.Update(3.f); + test_estimator.Update(5.f); + + EXPECT_GT(test_estimator.mean(), 0.f); + EXPECT_GT(test_estimator.std_deviation(), 0.f); + // Test Clear method + test_estimator.Clear(); + EXPECT_EQ(test_estimator.mean(), 0.f); + EXPECT_EQ(test_estimator.std_deviation(), 0.f); +} + +TEST(MeanVarianceEstimatorTests, InsertZeroes) { + MeanVarianceEstimator test_estimator; + // Insert the same value many times. + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(0.f); + } + EXPECT_EQ(test_estimator.mean(), 0.f); + EXPECT_EQ(test_estimator.std_deviation(), 0.f); +} + +TEST(MeanVarianceEstimatorTests, ConstantValueTest) { + MeanVarianceEstimator test_estimator; + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(3.f); + } + // The mean should be close to three, and the standard deviation should be + // close to zero. + EXPECT_NEAR(3.0f, test_estimator.mean(), 0.01f); + EXPECT_NEAR(0.0f, test_estimator.std_deviation(), 0.01f); +} + +TEST(MeanVarianceEstimatorTests, AlternatingValueTest) { + MeanVarianceEstimator test_estimator; + for (size_t i = 0; i < 20000; i++) { + test_estimator.Update(1.f); + test_estimator.Update(-1.f); + } + // The mean should be close to zero, and the standard deviation should be + // close to one. + EXPECT_NEAR(0.0f, test_estimator.mean(), 0.01f); + EXPECT_NEAR(1.0f, test_estimator.std_deviation(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max.cc new file mode 100644 index 0000000000..3054e98bd3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/moving_max.h" + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter for controlling how fast the estimated maximum decays after the +// previous maximum is no longer valid. With a value of 0.99, the maximum will +// decay to 1% of its former value after 460 updates. +constexpr float kDecayFactor = 0.99f; + +} // namespace + +MovingMax::MovingMax(size_t window_size) : window_size_(window_size) { + RTC_DCHECK_GT(window_size, 0); +} + +MovingMax::~MovingMax() {} + +void MovingMax::Update(float value) { + if (counter_ >= window_size_ - 1) { + max_value_ *= kDecayFactor; + } else { + ++counter_; + } + if (value > max_value_) { + max_value_ = value; + counter_ = 0; + } +} + +float MovingMax::max() const { + return max_value_; +} + +void MovingMax::Clear() { + max_value_ = 0.f; + counter_ = 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max.h b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max.h new file mode 100644 index 0000000000..f7d8ee8137 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ + +#include <stddef.h> + +namespace webrtc { + +class MovingMax { + public: + explicit MovingMax(size_t window_size); + ~MovingMax(); + + void Update(float value); + float max() const; + // Reset all of the state in this class. + void Clear(); + + private: + float max_value_ = 0.f; + size_t counter_ = 0; + size_t window_size_ = 1; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_MOVING_MAX_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc new file mode 100644 index 0000000000..b67b86ffc3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/moving_max_unittest.cc @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/moving_max.h" +#include "test/gtest.h" + +namespace webrtc { + +// Test if the maximum is correctly found. +TEST(MovingMaxTests, SimpleTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.1f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + EXPECT_EQ(1.9f, test_moving_max.max()); +} + +// Test if values fall out of the window when expected. +TEST(MovingMaxTests, SlidingWindowTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.7f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + test_moving_max.Update(1.3f); + test_moving_max.Update(1.2f); + EXPECT_LT(test_moving_max.max(), 1.9f); +} + +// Test if Clear() works as expected. +TEST(MovingMaxTests, ClearTest) { + MovingMax test_moving_max(5); + test_moving_max.Update(1.0f); + test_moving_max.Update(1.1f); + test_moving_max.Update(1.9f); + test_moving_max.Update(1.87f); + test_moving_max.Update(1.89f); + EXPECT_EQ(1.9f, test_moving_max.max()); + test_moving_max.Clear(); + EXPECT_EQ(0.f, test_moving_max.max()); +} + +// Test the decay of the estimated maximum. +TEST(MovingMaxTests, DecayTest) { + MovingMax test_moving_max(1); + test_moving_max.Update(1.0f); + float previous_value = 1.0f; + for (int i = 0; i < 500; i++) { + test_moving_max.Update(0.0f); + EXPECT_LT(test_moving_max.max(), previous_value); + EXPECT_GT(test_moving_max.max(), 0.0f); + previous_value = test_moving_max.max(); + } + EXPECT_LT(test_moving_max.max(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc new file mode 100644 index 0000000000..8ec9fe9f0b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" + +#include <math.h> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Parameter controlling the adaptation speed. +constexpr float kAlpha = 0.001f; + +} // namespace + +void NormalizedCovarianceEstimator::Update(float x, + float x_mean, + float x_sigma, + float y, + float y_mean, + float y_sigma) { + covariance_ = + (1.f - kAlpha) * covariance_ + kAlpha * (x - x_mean) * (y - y_mean); + normalized_cross_correlation_ = covariance_ / (x_sigma * y_sigma + .0001f); + RTC_DCHECK(isfinite(covariance_)); + RTC_DCHECK(isfinite(normalized_cross_correlation_)); +} + +void NormalizedCovarianceEstimator::Clear() { + covariance_ = 0.f; + normalized_cross_correlation_ = 0.f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h new file mode 100644 index 0000000000..e3c36d88ba --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ + +namespace webrtc { + +// This class iteratively estimates the normalized covariance between two +// signals. +class NormalizedCovarianceEstimator { + public: + void Update(float x, + float x_mean, + float x_var, + float y, + float y_mean, + float y_var); + // This function returns an estimate of the Pearson product-moment correlation + // coefficient of the two signals. + float normalized_cross_correlation() const { + return normalized_cross_correlation_; + } + float covariance() const { return covariance_; } + // This function resets the estimated values to zero. + void Clear(); + + private: + float normalized_cross_correlation_ = 0.f; + // Estimate of the covariance value. + float covariance_ = 0.f; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_ECHO_DETECTOR_NORMALIZED_COVARIANCE_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc new file mode 100644 index 0000000000..7e0512ee58 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/echo_detector/normalized_covariance_estimator_unittest.cc @@ -0,0 +1,40 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(NormalizedCovarianceEstimatorTests, IdenticalSignalTest) { + NormalizedCovarianceEstimator test_estimator; + for (size_t i = 0; i < 10000; i++) { + test_estimator.Update(1.f, 0.f, 1.f, 1.f, 0.f, 1.f); + test_estimator.Update(-1.f, 0.f, 1.f, -1.f, 0.f, 1.f); + } + // A normalized covariance value close to 1 is expected. + EXPECT_NEAR(1.f, test_estimator.normalized_cross_correlation(), 0.01f); + test_estimator.Clear(); + EXPECT_EQ(0.f, test_estimator.normalized_cross_correlation()); +} + +TEST(NormalizedCovarianceEstimatorTests, OppositeSignalTest) { + NormalizedCovarianceEstimator test_estimator; + // Insert the same value many times. + for (size_t i = 0; i < 10000; i++) { + test_estimator.Update(1.f, 0.f, 1.f, -1.f, 0.f, 1.f); + test_estimator.Update(-1.f, 0.f, 1.f, 1.f, 0.f, 1.f); + } + // A normalized covariance value close to -1 is expected. + EXPECT_NEAR(-1.f, test_estimator.normalized_cross_correlation(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_for_experimental_agc.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_for_experimental_agc.cc new file mode 100644 index 0000000000..d5d978c2e0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_for_experimental_agc.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_control_for_experimental_agc.h" + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" +#include "rtc_base/criticalsection.h" + +namespace webrtc { + +int GainControlForExperimentalAgc::instance_counter_ = 0; + +GainControlForExperimentalAgc::GainControlForExperimentalAgc( + GainControl* gain_control, + rtc::CriticalSection* crit_capture) + : data_dumper_(new ApmDataDumper(instance_counter_)), + real_gain_control_(gain_control), + volume_(0), + crit_capture_(crit_capture) { + instance_counter_++; +} + +GainControlForExperimentalAgc::~GainControlForExperimentalAgc() = default; + +int GainControlForExperimentalAgc::Enable(bool enable) { + return real_gain_control_->Enable(enable); +} + +bool GainControlForExperimentalAgc::is_enabled() const { + return real_gain_control_->is_enabled(); +} + +int GainControlForExperimentalAgc::set_stream_analog_level(int level) { + rtc::CritScope cs_capture(crit_capture_); + data_dumper_->DumpRaw("experimental_gain_control_set_stream_analog_level", 1, + &level); + volume_ = level; + return AudioProcessing::kNoError; +} + +int GainControlForExperimentalAgc::stream_analog_level() { + rtc::CritScope cs_capture(crit_capture_); + data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1, + &volume_); + return volume_; +} + +int GainControlForExperimentalAgc::set_mode(Mode mode) { + return AudioProcessing::kNoError; +} + +GainControl::Mode GainControlForExperimentalAgc::mode() const { + return GainControl::kAdaptiveAnalog; +} + +int GainControlForExperimentalAgc::set_target_level_dbfs(int level) { + return AudioProcessing::kNoError; +} + +int GainControlForExperimentalAgc::target_level_dbfs() const { + return real_gain_control_->target_level_dbfs(); +} + +int GainControlForExperimentalAgc::set_compression_gain_db(int gain) { + return AudioProcessing::kNoError; +} + +int GainControlForExperimentalAgc::compression_gain_db() const { + return real_gain_control_->compression_gain_db(); +} + +int GainControlForExperimentalAgc::enable_limiter(bool enable) { + return AudioProcessing::kNoError; +} + +bool GainControlForExperimentalAgc::is_limiter_enabled() const { + return real_gain_control_->is_limiter_enabled(); +} + +int GainControlForExperimentalAgc::set_analog_level_limits(int minimum, + int maximum) { + return AudioProcessing::kNoError; +} + +int GainControlForExperimentalAgc::analog_level_minimum() const { + return real_gain_control_->analog_level_minimum(); +} + +int GainControlForExperimentalAgc::analog_level_maximum() const { + return real_gain_control_->analog_level_maximum(); +} + +bool GainControlForExperimentalAgc::stream_is_saturated() const { + return real_gain_control_->stream_is_saturated(); +} + +void GainControlForExperimentalAgc::SetMicVolume(int volume) { + rtc::CritScope cs_capture(crit_capture_); + volume_ = volume; +} + +int GainControlForExperimentalAgc::GetMicVolume() { + rtc::CritScope cs_capture(crit_capture_); + return volume_; +} + +void GainControlForExperimentalAgc::Initialize() { + data_dumper_->InitiateNewSetOfRecordings(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_for_experimental_agc.h b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_for_experimental_agc.h new file mode 100644 index 0000000000..0894a0e30e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_for_experimental_agc.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_GAIN_CONTROL_FOR_EXPERIMENTAL_AGC_H_ +#define MODULES_AUDIO_PROCESSING_GAIN_CONTROL_FOR_EXPERIMENTAL_AGC_H_ + +#include "modules/audio_processing/agc/agc_manager_direct.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" +#include "rtc_base/thread_checker.h" + +namespace webrtc { + +class ApmDataDumper; + +// This class has two main purposes: +// +// 1) It is returned instead of the real GainControl after the new AGC has been +// enabled in order to prevent an outside user from overriding compression +// settings. It doesn't do anything in its implementation, except for +// delegating the const methods and Enable calls to the real GainControl, so +// AGC can still be disabled. +// +// 2) It is injected into AgcManagerDirect and implements volume callbacks for +// getting and setting the volume level. It just caches this value to be used +// in VoiceEngine later. +class GainControlForExperimentalAgc : public GainControl, + public VolumeCallbacks { + public: + GainControlForExperimentalAgc(GainControl* gain_control, + rtc::CriticalSection* crit_capture); + ~GainControlForExperimentalAgc() override; + + // GainControl implementation. + int Enable(bool enable) override; + bool is_enabled() const override; + int set_stream_analog_level(int level) override; + int stream_analog_level() override; + int set_mode(Mode mode) override; + Mode mode() const override; + int set_target_level_dbfs(int level) override; + int target_level_dbfs() const override; + int set_compression_gain_db(int gain) override; + int compression_gain_db() const override; + int enable_limiter(bool enable) override; + bool is_limiter_enabled() const override; + int set_analog_level_limits(int minimum, int maximum) override; + int analog_level_minimum() const override; + int analog_level_maximum() const override; + bool stream_is_saturated() const override; + + // VolumeCallbacks implementation. + void SetMicVolume(int volume) override; + int GetMicVolume() override; + + void Initialize(); + + private: + std::unique_ptr<ApmDataDumper> data_dumper_; + GainControl* real_gain_control_; + int volume_; + rtc::CriticalSection* crit_capture_; + static int instance_counter_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(GainControlForExperimentalAgc); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_GAIN_CONTROL_FOR_EXPERIMENTAL_AGC_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_impl.cc new file mode 100644 index 0000000000..e550ebbfad --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_impl.cc @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/gain_control_impl.h" + +#include "api/optional.h" +#include "modules/audio_processing/agc/legacy/gain_control.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +typedef void Handle; + +namespace { +int16_t MapSetting(GainControl::Mode mode) { + switch (mode) { + case GainControl::kAdaptiveAnalog: + return kAgcModeAdaptiveAnalog; + case GainControl::kAdaptiveDigital: + return kAgcModeAdaptiveDigital; + case GainControl::kFixedDigital: + return kAgcModeFixedDigital; + } + RTC_NOTREACHED(); + return -1; +} + +} // namespace + +class GainControlImpl::GainController { + public: + explicit GainController() { + state_ = WebRtcAgc_Create(); + RTC_CHECK(state_); + } + + ~GainController() { + RTC_DCHECK(state_); + WebRtcAgc_Free(state_); + } + + Handle* state() { + RTC_DCHECK(state_); + return state_; + } + + void Initialize(int minimum_capture_level, + int maximum_capture_level, + Mode mode, + int sample_rate_hz, + int capture_level) { + RTC_DCHECK(state_); + int error = + WebRtcAgc_Init(state_, minimum_capture_level, maximum_capture_level, + MapSetting(mode), sample_rate_hz); + RTC_DCHECK_EQ(0, error); + + set_capture_level(capture_level); + } + + void set_capture_level(int capture_level) { + capture_level_ = capture_level; + } + + int get_capture_level() { + RTC_DCHECK(capture_level_); + return *capture_level_; + } + + private: + Handle* state_; + // TODO(peah): Remove the optional once the initialization is moved into the + // ctor. + rtc::Optional<int> capture_level_; + + RTC_DISALLOW_COPY_AND_ASSIGN(GainController); +}; + +int GainControlImpl::instance_counter_ = 0; + +GainControlImpl::GainControlImpl(rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture) + : crit_render_(crit_render), + crit_capture_(crit_capture), + data_dumper_(new ApmDataDumper(instance_counter_)), + mode_(kAdaptiveAnalog), + minimum_capture_level_(0), + maximum_capture_level_(255), + limiter_enabled_(true), + target_level_dbfs_(3), + compression_gain_db_(9), + analog_capture_level_(0), + was_analog_level_set_(false), + stream_is_saturated_(false) { + RTC_DCHECK(crit_render); + RTC_DCHECK(crit_capture); +} + +GainControlImpl::~GainControlImpl() {} + +void GainControlImpl::ProcessRenderAudio( + rtc::ArrayView<const int16_t> packed_render_audio) { + rtc::CritScope cs_capture(crit_capture_); + if (!enabled_) { + return; + } + + for (auto& gain_controller : gain_controllers_) { + WebRtcAgc_AddFarend(gain_controller->state(), packed_render_audio.data(), + packed_render_audio.size()); + } +} + +void GainControlImpl::PackRenderAudioBuffer( + AudioBuffer* audio, + std::vector<int16_t>* packed_buffer) { + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + + packed_buffer->clear(); + packed_buffer->insert( + packed_buffer->end(), audio->mixed_low_pass_data(), + (audio->mixed_low_pass_data() + audio->num_frames_per_band())); +} + +int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs(crit_capture_); + + if (!enabled_) { + return AudioProcessing::kNoError; + } + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); + RTC_DCHECK_LE(*num_proc_channels_, gain_controllers_.size()); + + if (mode_ == kAdaptiveAnalog) { + int capture_channel = 0; + for (auto& gain_controller : gain_controllers_) { + gain_controller->set_capture_level(analog_capture_level_); + int err = WebRtcAgc_AddMic( + gain_controller->state(), audio->split_bands(capture_channel), + audio->num_bands(), audio->num_frames_per_band()); + + if (err != AudioProcessing::kNoError) { + return AudioProcessing::kUnspecifiedError; + } + ++capture_channel; + } + } else if (mode_ == kAdaptiveDigital) { + int capture_channel = 0; + for (auto& gain_controller : gain_controllers_) { + int32_t capture_level_out = 0; + int err = WebRtcAgc_VirtualMic( + gain_controller->state(), audio->split_bands(capture_channel), + audio->num_bands(), audio->num_frames_per_band(), + analog_capture_level_, &capture_level_out); + + gain_controller->set_capture_level(capture_level_out); + + if (err != AudioProcessing::kNoError) { + return AudioProcessing::kUnspecifiedError; + } + ++capture_channel; + } + } + + return AudioProcessing::kNoError; +} + +int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, + bool stream_has_echo) { + rtc::CritScope cs(crit_capture_); + + if (!enabled_) { + return AudioProcessing::kNoError; + } + + if (mode_ == kAdaptiveAnalog && !was_analog_level_set_) { + return AudioProcessing::kStreamParameterNotSetError; + } + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(audio->num_channels(), *num_proc_channels_); + + stream_is_saturated_ = false; + int capture_channel = 0; + for (auto& gain_controller : gain_controllers_) { + int32_t capture_level_out = 0; + uint8_t saturation_warning = 0; + + // The call to stream_has_echo() is ok from a deadlock perspective + // as the capture lock is allready held. + int err = WebRtcAgc_Process( + gain_controller->state(), audio->split_bands_const(capture_channel), + audio->num_bands(), audio->num_frames_per_band(), + audio->split_bands(capture_channel), + gain_controller->get_capture_level(), &capture_level_out, + stream_has_echo, &saturation_warning); + + if (err != AudioProcessing::kNoError) { + return AudioProcessing::kUnspecifiedError; + } + + gain_controller->set_capture_level(capture_level_out); + if (saturation_warning == 1) { + stream_is_saturated_ = true; + } + + ++capture_channel; + } + + RTC_DCHECK_LT(0ul, *num_proc_channels_); + if (mode_ == kAdaptiveAnalog) { + // Take the analog level to be the average across the handles. + analog_capture_level_ = 0; + for (auto& gain_controller : gain_controllers_) { + analog_capture_level_ += gain_controller->get_capture_level(); + } + + analog_capture_level_ /= (*num_proc_channels_); + } + + was_analog_level_set_ = false; + return AudioProcessing::kNoError; +} + +int GainControlImpl::compression_gain_db() const { + rtc::CritScope cs(crit_capture_); + return compression_gain_db_; +} + +// TODO(ajm): ensure this is called under kAdaptiveAnalog. +int GainControlImpl::set_stream_analog_level(int level) { + rtc::CritScope cs(crit_capture_); + data_dumper_->DumpRaw("gain_control_set_stream_analog_level", 1, &level); + + was_analog_level_set_ = true; + if (level < minimum_capture_level_ || level > maximum_capture_level_) { + return AudioProcessing::kBadParameterError; + } + analog_capture_level_ = level; + + return AudioProcessing::kNoError; +} + +int GainControlImpl::stream_analog_level() { + rtc::CritScope cs(crit_capture_); + data_dumper_->DumpRaw("gain_control_stream_analog_level", 1, + &analog_capture_level_); + // TODO(ajm): enable this assertion? + //RTC_DCHECK_EQ(kAdaptiveAnalog, mode_); + + return analog_capture_level_; +} + +int GainControlImpl::Enable(bool enable) { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + if (enable && !enabled_) { + enabled_ = enable; // Must be set before Initialize() is called. + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK(sample_rate_hz_); + Initialize(*num_proc_channels_, *sample_rate_hz_); + } else { + enabled_ = enable; + } + return AudioProcessing::kNoError; +} + +bool GainControlImpl::is_enabled() const { + rtc::CritScope cs(crit_capture_); + return enabled_; +} + +int GainControlImpl::set_mode(Mode mode) { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + if (MapSetting(mode) == -1) { + return AudioProcessing::kBadParameterError; + } + + mode_ = mode; + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK(sample_rate_hz_); + Initialize(*num_proc_channels_, *sample_rate_hz_); + return AudioProcessing::kNoError; +} + +GainControl::Mode GainControlImpl::mode() const { + rtc::CritScope cs(crit_capture_); + return mode_; +} + +int GainControlImpl::set_analog_level_limits(int minimum, + int maximum) { + if (minimum < 0) { + return AudioProcessing::kBadParameterError; + } + + if (maximum > 65535) { + return AudioProcessing::kBadParameterError; + } + + if (maximum < minimum) { + return AudioProcessing::kBadParameterError; + } + + size_t num_proc_channels_local = 0u; + int sample_rate_hz_local = 0; + { + rtc::CritScope cs(crit_capture_); + + minimum_capture_level_ = minimum; + maximum_capture_level_ = maximum; + + RTC_DCHECK(num_proc_channels_); + RTC_DCHECK(sample_rate_hz_); + num_proc_channels_local = *num_proc_channels_; + sample_rate_hz_local = *sample_rate_hz_; + } + Initialize(num_proc_channels_local, sample_rate_hz_local); + return AudioProcessing::kNoError; +} + +int GainControlImpl::analog_level_minimum() const { + rtc::CritScope cs(crit_capture_); + return minimum_capture_level_; +} + +int GainControlImpl::analog_level_maximum() const { + rtc::CritScope cs(crit_capture_); + return maximum_capture_level_; +} + +bool GainControlImpl::stream_is_saturated() const { + rtc::CritScope cs(crit_capture_); + return stream_is_saturated_; +} + +int GainControlImpl::set_target_level_dbfs(int level) { + if (level > 31 || level < 0) { + return AudioProcessing::kBadParameterError; + } + { + rtc::CritScope cs(crit_capture_); + target_level_dbfs_ = level; + } + return Configure(); +} + +int GainControlImpl::target_level_dbfs() const { + rtc::CritScope cs(crit_capture_); + return target_level_dbfs_; +} + +int GainControlImpl::set_compression_gain_db(int gain) { + if (gain < 0 || gain > 90) { + return AudioProcessing::kBadParameterError; + } + { + rtc::CritScope cs(crit_capture_); + compression_gain_db_ = gain; + } + return Configure(); +} + +int GainControlImpl::enable_limiter(bool enable) { + { + rtc::CritScope cs(crit_capture_); + limiter_enabled_ = enable; + } + return Configure(); +} + +bool GainControlImpl::is_limiter_enabled() const { + rtc::CritScope cs(crit_capture_); + return limiter_enabled_; +} + +void GainControlImpl::Initialize(size_t num_proc_channels, int sample_rate_hz) { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + data_dumper_->InitiateNewSetOfRecordings(); + + num_proc_channels_ = num_proc_channels; + sample_rate_hz_ = sample_rate_hz; + + if (!enabled_) { + return; + } + + gain_controllers_.resize(*num_proc_channels_); + for (auto& gain_controller : gain_controllers_) { + if (!gain_controller) { + gain_controller.reset(new GainController()); + } + gain_controller->Initialize(minimum_capture_level_, maximum_capture_level_, + mode_, *sample_rate_hz_, analog_capture_level_); + } + + Configure(); +} + +int GainControlImpl::Configure() { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + WebRtcAgcConfig config; + // TODO(ajm): Flip the sign here (since AGC expects a positive value) if we + // change the interface. + //RTC_DCHECK_LE(target_level_dbfs_, 0); + //config.targetLevelDbfs = static_cast<int16_t>(-target_level_dbfs_); + config.targetLevelDbfs = static_cast<int16_t>(target_level_dbfs_); + config.compressionGaindB = + static_cast<int16_t>(compression_gain_db_); + config.limiterEnable = limiter_enabled_; + + int error = AudioProcessing::kNoError; + for (auto& gain_controller : gain_controllers_) { + const int handle_error = + WebRtcAgc_set_config(gain_controller->state(), config); + if (handle_error != AudioProcessing::kNoError) { + error = handle_error; + } + } + return error; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_impl.h new file mode 100644 index 0000000000..26745065cf --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_impl.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ + +#include <memory> +#include <vector> + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/render_queue_item_verifier.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" +#include "rtc_base/swap_queue.h" +#include "rtc_base/thread_annotations.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class GainControlImpl : public GainControl { + public: + GainControlImpl(rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture); + ~GainControlImpl() override; + + void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio); + int AnalyzeCaptureAudio(AudioBuffer* audio); + int ProcessCaptureAudio(AudioBuffer* audio, bool stream_has_echo); + + void Initialize(size_t num_proc_channels, int sample_rate_hz); + + static void PackRenderAudioBuffer(AudioBuffer* audio, + std::vector<int16_t>* packed_buffer); + + // GainControl implementation. + bool is_enabled() const override; + int stream_analog_level() override; + bool is_limiter_enabled() const override; + Mode mode() const override; + + int compression_gain_db() const override; + + private: + class GainController; + + // GainControl implementation. + int Enable(bool enable) override; + int set_stream_analog_level(int level) override; + int set_mode(Mode mode) override; + int set_target_level_dbfs(int level) override; + int target_level_dbfs() const override; + int set_compression_gain_db(int gain) override; + int enable_limiter(bool enable) override; + int set_analog_level_limits(int minimum, int maximum) override; + int analog_level_minimum() const override; + int analog_level_maximum() const override; + bool stream_is_saturated() const override; + + int Configure(); + + rtc::CriticalSection* const crit_render_ RTC_ACQUIRED_BEFORE(crit_capture_); + rtc::CriticalSection* const crit_capture_; + + std::unique_ptr<ApmDataDumper> data_dumper_; + + bool enabled_ = false; + + Mode mode_ RTC_GUARDED_BY(crit_capture_); + int minimum_capture_level_ RTC_GUARDED_BY(crit_capture_); + int maximum_capture_level_ RTC_GUARDED_BY(crit_capture_); + bool limiter_enabled_ RTC_GUARDED_BY(crit_capture_); + int target_level_dbfs_ RTC_GUARDED_BY(crit_capture_); + int compression_gain_db_ RTC_GUARDED_BY(crit_capture_); + int analog_capture_level_ RTC_GUARDED_BY(crit_capture_); + bool was_analog_level_set_ RTC_GUARDED_BY(crit_capture_); + bool stream_is_saturated_ RTC_GUARDED_BY(crit_capture_); + + std::vector<std::unique_ptr<GainController>> gain_controllers_; + + rtc::Optional<size_t> num_proc_channels_ RTC_GUARDED_BY(crit_capture_); + rtc::Optional<int> sample_rate_hz_ RTC_GUARDED_BY(crit_capture_); + + static int instance_counter_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(GainControlImpl); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_unittest.cc new file mode 100644 index 0000000000..62908c7aec --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/gain_control_unittest.cc @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/gain_control_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 100; + +void ProcessOneFrame(int sample_rate_hz, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + GainControlImpl* gain_controller) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + std::vector<int16_t> render_audio; + GainControlImpl::PackRenderAudioBuffer(render_audio_buffer, &render_audio); + gain_controller->ProcessRenderAudio(render_audio); + gain_controller->AnalyzeCaptureAudio(capture_audio_buffer); + gain_controller->ProcessCaptureAudio(capture_audio_buffer, false); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_audio_buffer->MergeFrequencyBands(); + } +} + +void SetupComponent(int sample_rate_hz, + GainControl::Mode mode, + int target_level_dbfs, + int stream_analog_level, + int compression_gain_db, + bool enable_limiter, + int analog_level_min, + int analog_level_max, + GainControlImpl* gain_controller) { + gain_controller->Initialize(1, sample_rate_hz); + GainControl* gc = static_cast<GainControl*>(gain_controller); + gc->Enable(true); + gc->set_mode(mode); + gc->set_stream_analog_level(stream_analog_level); + gc->set_target_level_dbfs(target_level_dbfs); + gc->set_compression_gain_db(compression_gain_db); + gc->enable_limiter(enable_limiter); + gc->set_analog_level_limits(analog_level_min, analog_level_max); +} + +void RunBitExactnessTest(int sample_rate_hz, + size_t num_channels, + GainControl::Mode mode, + int target_level_dbfs, + int stream_analog_level, + int compression_gain_db, + bool enable_limiter, + int analog_level_min, + int analog_level_max, + int achieved_stream_analog_level_reference, + rtc::ArrayView<const float> output_reference) { + rtc::CriticalSection crit_render; + rtc::CriticalSection crit_capture; + GainControlImpl gain_controller(&crit_render, &crit_capture); + SetupComponent(sample_rate_hz, mode, target_level_dbfs, stream_analog_level, + compression_gain_db, enable_limiter, analog_level_min, + analog_level_max, &gain_controller); + + const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig render_config(sample_rate_hz, num_channels, false); + AudioBuffer render_buffer( + render_config.num_frames(), render_config.num_channels(), + render_config.num_frames(), 1, render_config.num_frames()); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector<float> render_input(samples_per_channel * num_channels); + + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), 1, capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(samples_per_channel * num_channels); + + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &render_file, render_input); + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer, + &gain_controller); + } + + // Extract and verify the test results. + std::vector<float> capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + EXPECT_EQ(achieved_stream_analog_level_reference, + gain_controller.stream_analog_level()); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kElementErrorBound)); +} + +} // namespace + +// TODO(peah): Activate all these tests for ARM and ARM64 once the issue on the +// Chromium ARM and ARM64 boths have been identified. This is tracked in the +// issue https://bugs.chromium.org/p/webrtc/issues/detail?id=5711. + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono8kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono8kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006622f, -0.002747f, 0.001587f}; + RunBitExactnessTest(8000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006561f, -0.004608f, -0.002899f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.027313f, -0.015900f, -0.028107f, + -0.027313f, -0.015900f, -0.028107f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.010162f, -0.009155f, -0.008301f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_AdaptiveAnalog_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.010162f, -0.009155f, -0.008301f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono8kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono8kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.004028f, -0.001678f, 0.000946f}; + RunBitExactnessTest(8000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.003967f, -0.002777f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.015411f, -0.008972f, -0.015839f, + -0.015411f, -0.008972f, -0.015839f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006104f, -0.005524f, -0.004974f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_AdaptiveDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.006104f, -0.005524f, -0.004974f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kAdaptiveDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono8kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono8kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.011871f, -0.004944f, 0.002838f}; + RunBitExactnessTest(8000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.011749f, -0.008270f, -0.005219f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Stereo16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Stereo16kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.048950f, -0.028503f, -0.050354f, + -0.048950f, -0.028503f, -0.050354f}; + RunBitExactnessTest(16000, 2, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono32kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono32kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.018188f, -0.016418f, -0.014862f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono48kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono48kHz_FixedDigital_Tl10_SL50_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 50; + const float kOutputReference[] = {-0.018188f, -0.016418f, -0.014862f}; + RunBitExactnessTest(32000, 1, GainControl::Mode::kFixedDigital, 10, 50, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL10_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL10_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 12; + const float kOutputReference[] = {-0.006561f, -0.004608f, -0.002899f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 10, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveAnalog_Tl10_SL100_CG5_Lim_AL70_80) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveAnalog_Tl10_SL100_CG5_Lim_AL70_80) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.003998f, -0.002808f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveAnalog, 10, 100, 5, + true, 70, 80, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL100_CG5_NoLim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL100_CG5_NoLim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.004028f, -0.002838f, -0.001770f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 100, 5, + false, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl40_SL100_CG5_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl40_SL100_CG5_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.008728f, -0.006134f, -0.003845f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 40, 100, 5, + true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) +TEST(GainControlBitExactnessTest, + Mono16kHz_AdaptiveDigital_Tl10_SL100_CG30_Lim_AL0_100) { +#else +TEST(GainControlBitExactnessTest, + DISABLED_Mono16kHz_AdaptiveDigital_Tl10_SL100_CG30_Lim_AL0_100) { +#endif + const int kStreamAnalogLevelReference = 100; + const float kOutputReference[] = {-0.005859f, -0.004120f, -0.002594f}; + RunBitExactnessTest(16000, 1, GainControl::Mode::kAdaptiveDigital, 10, 100, + 30, true, 0, 100, kStreamAnalogLevelReference, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/aec_dump.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/include/aec_dump.cc new file mode 100644 index 0000000000..365d01510e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/aec_dump.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/aec_dump.h" + +namespace webrtc { +InternalAPMConfig::InternalAPMConfig() = default; +InternalAPMConfig::InternalAPMConfig(const InternalAPMConfig&) = default; +InternalAPMConfig::InternalAPMConfig(InternalAPMConfig&&) = default; +InternalAPMConfig& InternalAPMConfig::operator=(const InternalAPMConfig&) = + default; + +bool InternalAPMConfig::operator==(const InternalAPMConfig& other) { + return aec_enabled == other.aec_enabled && + aec_delay_agnostic_enabled == other.aec_delay_agnostic_enabled && + aec_drift_compensation_enabled == + other.aec_drift_compensation_enabled && + aec_extended_filter_enabled == other.aec_extended_filter_enabled && + aec_suppression_level == other.aec_suppression_level && + aecm_enabled == other.aecm_enabled && + aecm_comfort_noise_enabled == other.aecm_comfort_noise_enabled && + aecm_routing_mode == other.aecm_routing_mode && + agc_enabled == other.agc_enabled && agc_mode == other.agc_mode && + agc_limiter_enabled == other.agc_limiter_enabled && + hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled && + ns_level == other.ns_level && + transient_suppression_enabled == other.transient_suppression_enabled && + intelligibility_enhancer_enabled == + other.intelligibility_enhancer_enabled && + noise_robust_agc_enabled == other.noise_robust_agc_enabled && + experiments_description == other.experiments_description; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/aec_dump.h b/third_party/libwebrtc/webrtc/modules/audio_processing/include/aec_dump.h new file mode 100644 index 0000000000..0c8d2271cc --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/aec_dump.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ + +#include <memory> +#include <string> +#include <vector> + +#include "api/array_view.h" + +namespace webrtc { + +class AudioFrame; + +// Struct for passing current config from APM without having to +// include protobuf headers. +struct InternalAPMConfig { + InternalAPMConfig(); + InternalAPMConfig(const InternalAPMConfig&); + InternalAPMConfig(InternalAPMConfig&&); + + InternalAPMConfig& operator=(const InternalAPMConfig&); + InternalAPMConfig& operator=(InternalAPMConfig&&) = delete; + + bool operator==(const InternalAPMConfig& other); + + bool aec_enabled = false; + bool aec_delay_agnostic_enabled = false; + bool aec_drift_compensation_enabled = false; + bool aec_extended_filter_enabled = false; + int aec_suppression_level = 0; + bool aecm_enabled = false; + bool aecm_comfort_noise_enabled = false; + int aecm_routing_mode = 0; + bool agc_enabled = false; + int agc_mode = 0; + bool agc_limiter_enabled = false; + bool hpf_enabled = false; + bool ns_enabled = false; + int ns_level = 0; + bool transient_suppression_enabled = false; + bool intelligibility_enhancer_enabled = false; + bool noise_robust_agc_enabled = false; + std::string experiments_description = ""; +}; + +struct InternalAPMStreamsConfig { + int input_sample_rate = 0; + int output_sample_rate = 0; + int render_input_sample_rate = 0; + int render_output_sample_rate = 0; + + size_t input_num_channels = 0; + size_t output_num_channels = 0; + size_t render_input_num_channels = 0; + size_t render_output_num_channels = 0; +}; + +// Class to pass audio data in float** format. This is to avoid +// dependence on AudioBuffer, and avoid problems associated with +// rtc::ArrayView<rtc::ArrayView>. +class FloatAudioFrame { + public: + // |num_channels| and |channel_size| describe the float** + // |audio_samples|. |audio_samples| is assumed to point to a + // two-dimensional |num_channels * channel_size| array of floats. + FloatAudioFrame(const float* const* audio_samples, + size_t num_channels, + size_t channel_size) + : audio_samples_(audio_samples), + num_channels_(num_channels), + channel_size_(channel_size) {} + + FloatAudioFrame() = delete; + + size_t num_channels() const { return num_channels_; } + + rtc::ArrayView<const float> channel(size_t idx) const { + RTC_DCHECK_LE(0, idx); + RTC_DCHECK_LE(idx, num_channels_); + return rtc::ArrayView<const float>(audio_samples_[idx], channel_size_); + } + + private: + const float* const* audio_samples_; + size_t num_channels_; + size_t channel_size_; +}; + +// An interface for recording configuration and input/output streams +// of the Audio Processing Module. The recordings are called +// 'aec-dumps' and are stored in a protobuf format defined in +// debug.proto. +// The Write* methods are always safe to call concurrently or +// otherwise for all implementing subclasses. The intended mode of +// operation is to create a protobuf object from the input, and send +// it away to be written to file asynchronously. +class AecDump { + public: + struct AudioProcessingState { + int delay; + int drift; + int level; + bool keypress; + }; + + virtual ~AecDump() = default; + + // Logs Event::Type INIT message. + virtual void WriteInitMessage( + const InternalAPMStreamsConfig& streams_config) = 0; + + // Logs Event::Type STREAM message. To log an input/output pair, + // call the AddCapture* and AddAudioProcessingState methods followed + // by a WriteCaptureStreamMessage call. + virtual void AddCaptureStreamInput(const FloatAudioFrame& src) = 0; + virtual void AddCaptureStreamOutput(const FloatAudioFrame& src) = 0; + virtual void AddCaptureStreamInput(const AudioFrame& frame) = 0; + virtual void AddCaptureStreamOutput(const AudioFrame& frame) = 0; + virtual void AddAudioProcessingState(const AudioProcessingState& state) = 0; + virtual void WriteCaptureStreamMessage() = 0; + + // Logs Event::Type REVERSE_STREAM message. + virtual void WriteRenderStreamMessage(const AudioFrame& frame) = 0; + virtual void WriteRenderStreamMessage(const FloatAudioFrame& src) = 0; + + // Logs Event::Type CONFIG message. + virtual void WriteConfig(const InternalAPMConfig& config) = 0; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AEC_DUMP_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing.cc new file mode 100644 index 0000000000..8410a3dfbb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_processing.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +Beamforming::Beamforming() + : enabled(false), + array_geometry(), + target_direction( + SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f)) {} +Beamforming::Beamforming(bool enabled, const std::vector<Point>& array_geometry) + : Beamforming(enabled, + array_geometry, + SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f)) {} + +Beamforming::Beamforming(bool enabled, + const std::vector<Point>& array_geometry, + SphericalPointf target_direction) + : enabled(enabled), + array_geometry(array_geometry), + target_direction(target_direction) {} + +Beamforming::~Beamforming() {} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing.h b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing.h new file mode 100644 index 0000000000..df397b1407 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing.h @@ -0,0 +1,1228 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ + +// MSVC++ requires this to be set before any other includes to get M_PI. +// MOZILLA: this is already defined in mozilla-config.h +// #define _USE_MATH_DEFINES + +#include <math.h> +#include <stddef.h> // size_t +#include <stdio.h> // FILE +#include <string.h> +#include <vector> + +#include "api/optional.h" +#include "modules/audio_processing/beamformer/array_util.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "modules/audio_processing/include/config.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/deprecation.h" +#include "rtc_base/platform_file.h" +#include "rtc_base/refcount.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +struct AecCore; + +class AecDump; +class AudioBuffer; +class AudioFrame; + +class NonlinearBeamformer; + +class StreamConfig; +class ProcessingConfig; + +class EchoCancellation; +class EchoControlMobile; +class EchoControlFactory; +class GainControl; +class HighPassFilter; +class LevelEstimator; +class NoiseSuppression; +class PostProcessing; +class VoiceDetection; + +// Use to enable the extended filter mode in the AEC, along with robustness +// measures around the reported system delays. It comes with a significant +// increase in AEC complexity, but is much more robust to unreliable reported +// delays. +// +// Detailed changes to the algorithm: +// - The filter length is changed from 48 to 128 ms. This comes with tuning of +// several parameters: i) filter adaptation stepsize and error threshold; +// ii) non-linear processing smoothing and overdrive. +// - Option to ignore the reported delays on platforms which we deem +// sufficiently unreliable. See WEBRTC_UNTRUSTED_DELAY in echo_cancellation.c. +// - Faster startup times by removing the excessive "startup phase" processing +// of reported delays. +// - Much more conservative adjustments to the far-end read pointer. We smooth +// the delay difference more heavily, and back off from the difference more. +// Adjustments force a readaptation of the filter, so they should be avoided +// except when really necessary. +struct ExtendedFilter { + ExtendedFilter() : enabled(false) {} + explicit ExtendedFilter(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kExtendedFilter; + bool enabled; +}; + +// Enables the refined linear filter adaptation in the echo canceller. +// This configuration only applies to EchoCancellation and not +// EchoControlMobile. It can be set in the constructor +// or using AudioProcessing::SetExtraOptions(). +struct RefinedAdaptiveFilter { + RefinedAdaptiveFilter() : enabled(false) {} + explicit RefinedAdaptiveFilter(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = + ConfigOptionID::kAecRefinedAdaptiveFilter; + bool enabled; +}; + +// Enables delay-agnostic echo cancellation. This feature relies on internally +// estimated delays between the process and reverse streams, thus not relying +// on reported system delays. This configuration only applies to +// EchoCancellation and not EchoControlMobile. It can be set in the constructor +// or using AudioProcessing::SetExtraOptions(). +struct DelayAgnostic { + DelayAgnostic() : enabled(false) {} + explicit DelayAgnostic(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kDelayAgnostic; + bool enabled; +}; + +// Use to enable experimental gain control (AGC). At startup the experimental +// AGC moves the microphone volume up to |startup_min_volume| if the current +// microphone volume is set too low. The value is clamped to its operating range +// [12, 255]. Here, 255 maps to 100%. +// +// Must be provided through AudioProcessing::Create(Confg&). +#if defined(WEBRTC_CHROMIUM_BUILD) +static const int kAgcStartupMinVolume = 85; +#else +static const int kAgcStartupMinVolume = 0; +#endif // defined(WEBRTC_CHROMIUM_BUILD) +static constexpr int kClippedLevelMin = 70; +struct ExperimentalAgc { + ExperimentalAgc() = default; + explicit ExperimentalAgc(bool enabled) : enabled(enabled) {} + ExperimentalAgc(bool enabled, int startup_min_volume) + : enabled(enabled), startup_min_volume(startup_min_volume) {} + ExperimentalAgc(bool enabled, int startup_min_volume, int clipped_level_min) + : enabled(enabled), + startup_min_volume(startup_min_volume), + clipped_level_min(clipped_level_min) {} + static const ConfigOptionID identifier = ConfigOptionID::kExperimentalAgc; + bool enabled = true; + int startup_min_volume = kAgcStartupMinVolume; + // Lowest microphone level that will be applied in response to clipping. + int clipped_level_min = kClippedLevelMin; +}; + +// Use to enable experimental noise suppression. It can be set in the +// constructor or using AudioProcessing::SetExtraOptions(). +struct ExperimentalNs { + ExperimentalNs() : enabled(false) {} + explicit ExperimentalNs(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kExperimentalNs; + bool enabled; +}; + +// Use to enable beamforming. Must be provided through the constructor. It will +// have no impact if used with AudioProcessing::SetExtraOptions(). +struct Beamforming { + Beamforming(); + Beamforming(bool enabled, const std::vector<Point>& array_geometry); + Beamforming(bool enabled, + const std::vector<Point>& array_geometry, + SphericalPointf target_direction); + ~Beamforming(); + + static const ConfigOptionID identifier = ConfigOptionID::kBeamforming; + const bool enabled; + const std::vector<Point> array_geometry; + const SphericalPointf target_direction; +}; + +// Use to enable intelligibility enhancer in audio processing. +// +// Note: If enabled and the reverse stream has more than one output channel, +// the reverse stream will become an upmixed mono signal. +struct Intelligibility { + Intelligibility() : enabled(false) {} + explicit Intelligibility(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kIntelligibility; + bool enabled; +}; + +// The Audio Processing Module (APM) provides a collection of voice processing +// components designed for real-time communications software. +// +// APM operates on two audio streams on a frame-by-frame basis. Frames of the +// primary stream, on which all processing is applied, are passed to +// |ProcessStream()|. Frames of the reverse direction stream are passed to +// |ProcessReverseStream()|. On the client-side, this will typically be the +// near-end (capture) and far-end (render) streams, respectively. APM should be +// placed in the signal chain as close to the audio hardware abstraction layer +// (HAL) as possible. +// +// On the server-side, the reverse stream will normally not be used, with +// processing occurring on each incoming stream. +// +// Component interfaces follow a similar pattern and are accessed through +// corresponding getters in APM. All components are disabled at create-time, +// with default settings that are recommended for most situations. New settings +// can be applied without enabling a component. Enabling a component triggers +// memory allocation and initialization to allow it to start processing the +// streams. +// +// Thread safety is provided with the following assumptions to reduce locking +// overhead: +// 1. The stream getters and setters are called from the same thread as +// ProcessStream(). More precisely, stream functions are never called +// concurrently with ProcessStream(). +// 2. Parameter getters are never called concurrently with the corresponding +// setter. +// +// APM accepts only linear PCM audio data in chunks of 10 ms. The int16 +// interfaces use interleaved data, while the float interfaces use deinterleaved +// data. +// +// Usage example, omitting error checking: +// AudioProcessing* apm = AudioProcessing::Create(0); +// +// AudioProcessing::Config config; +// config.level_controller.enabled = true; +// config.high_pass_filter.enabled = true; +// apm->ApplyConfig(config) +// +// apm->echo_cancellation()->enable_drift_compensation(false); +// apm->echo_cancellation()->Enable(true); +// +// apm->noise_reduction()->set_level(kHighSuppression); +// apm->noise_reduction()->Enable(true); +// +// apm->gain_control()->set_analog_level_limits(0, 255); +// apm->gain_control()->set_mode(kAdaptiveAnalog); +// apm->gain_control()->Enable(true); +// +// apm->voice_detection()->Enable(true); +// +// // Start a voice call... +// +// // ... Render frame arrives bound for the audio HAL ... +// apm->ProcessReverseStream(render_frame); +// +// // ... Capture frame arrives from the audio HAL ... +// // Call required set_stream_ functions. +// apm->set_stream_delay_ms(delay_ms); +// apm->gain_control()->set_stream_analog_level(analog_level); +// +// apm->ProcessStream(capture_frame); +// +// // Call required stream_ functions. +// analog_level = apm->gain_control()->stream_analog_level(); +// has_voice = apm->stream_has_voice(); +// +// // Repeate render and capture processing for the duration of the call... +// // Start a new call... +// apm->Initialize(); +// +// // Close the application... +// delete apm; +// +class AudioProcessing : public rtc::RefCountInterface { + public: + // The struct below constitutes the new parameter scheme for the audio + // processing. It is being introduced gradually and until it is fully + // introduced, it is prone to change. + // TODO(peah): Remove this comment once the new config scheme is fully rolled + // out. + // + // The parameters and behavior of the audio processing module are controlled + // by changing the default values in the AudioProcessing::Config struct. + // The config is applied by passing the struct to the ApplyConfig method. + struct Config { + struct LevelController { + bool enabled = false; + + // Sets the initial peak level to use inside the level controller in order + // to compute the signal gain. The unit for the peak level is dBFS and + // the allowed range is [-100, 0]. + float initial_peak_level_dbfs = -6.0206f; + } level_controller; + struct ResidualEchoDetector { + bool enabled = true; + } residual_echo_detector; + + struct HighPassFilter { + bool enabled = false; + } high_pass_filter; + + // Deprecated way of activating AEC3. + // TODO(gustaf): Remove when possible. + struct EchoCanceller3 { + bool enabled = false; + } echo_canceller3; + + // Enables the next generation AGC functionality. This feature replaces the + // standard methods of gain control in the previous AGC. + // The functionality is not yet activated in the code and turning this on + // does not yet have the desired behavior. + struct GainController2 { + bool enabled = false; + float fixed_gain_db = 0.f; + } gain_controller2; + + // Explicit copy assignment implementation to avoid issues with memory + // sanitizer complaints in case of self-assignment. + // TODO(peah): Add buildflag to ensure that this is only included for memory + // sanitizer builds. + Config& operator=(const Config& config) { + if (this != &config) { + memcpy(this, &config, sizeof(*this)); + } + return *this; + } + }; + + // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone. + enum ChannelLayout { + kMono, + // Left, right. + kStereo, + // Mono, keyboard, and mic. + kMonoAndKeyboard, + // Left, right, keyboard, and mic. + kStereoAndKeyboard + }; + + // Creates an APM instance. Use one instance for every primary audio stream + // requiring processing. On the client-side, this would typically be one + // instance for the near-end stream, and additional instances for each far-end + // stream which requires processing. On the server-side, this would typically + // be one instance for every incoming stream. + static AudioProcessing* Create(); + // Allows passing in an optional configuration at create-time. + static AudioProcessing* Create(const webrtc::Config& config); + // Deprecated. Use the Create below, with nullptr PostProcessing. + RTC_DEPRECATED + static AudioProcessing* Create(const webrtc::Config& config, + NonlinearBeamformer* beamformer); + // Allows passing in optional user-defined processing modules. + static AudioProcessing* Create( + const webrtc::Config& config, + std::unique_ptr<PostProcessing> capture_post_processor, + std::unique_ptr<EchoControlFactory> echo_control_factory, + NonlinearBeamformer* beamformer); + ~AudioProcessing() override {} + + // Initializes internal states, while retaining all user settings. This + // should be called before beginning to process a new audio stream. However, + // it is not necessary to call before processing the first stream after + // creation. + // + // It is also not necessary to call if the audio parameters (sample + // rate and number of channels) have changed. Passing updated parameters + // directly to |ProcessStream()| and |ProcessReverseStream()| is permissible. + // If the parameters are known at init-time though, they may be provided. + virtual int Initialize() = 0; + + // The int16 interfaces require: + // - only |NativeRate|s be used + // - that the input, output and reverse rates must match + // - that |processing_config.output_stream()| matches + // |processing_config.input_stream()|. + // + // The float interfaces accept arbitrary rates and support differing input and + // output layouts, but the output must have either one channel or the same + // number of channels as the input. + virtual int Initialize(const ProcessingConfig& processing_config) = 0; + + // Initialize with unpacked parameters. See Initialize() above for details. + // + // TODO(mgraczyk): Remove once clients are updated to use the new interface. + virtual int Initialize(int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + int render_sample_rate_hz, + ChannelLayout capture_input_layout, + ChannelLayout capture_output_layout, + ChannelLayout render_input_layout) = 0; + + // TODO(peah): This method is a temporary solution used to take control + // over the parameters in the audio processing module and is likely to change. + virtual void ApplyConfig(const Config& config) = 0; + + // Pass down additional options which don't have explicit setters. This + // ensures the options are applied immediately. + virtual void SetExtraOptions(const webrtc::Config& config) = 0; + + // TODO(ajm): Only intended for internal use. Make private and friend the + // necessary classes? + virtual int proc_sample_rate_hz() const = 0; + virtual int proc_split_sample_rate_hz() const = 0; + virtual size_t num_input_channels() const = 0; + virtual size_t num_proc_channels() const = 0; + virtual size_t num_output_channels() const = 0; + virtual size_t num_reverse_channels() const = 0; + + // Set to true when the output of AudioProcessing will be muted or in some + // other way not used. Ideally, the captured audio would still be processed, + // but some components may change behavior based on this information. + // Default false. + virtual void set_output_will_be_muted(bool muted) = 0; + + // Processes a 10 ms |frame| of the primary audio stream. On the client-side, + // this is the near-end (or captured) audio. + // + // If needed for enabled functionality, any function with the set_stream_ tag + // must be called prior to processing the current frame. Any getter function + // with the stream_ tag which is needed should be called after processing. + // + // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| + // members of |frame| must be valid. If changed from the previous call to this + // method, it will trigger an initialization. + virtual int ProcessStream(AudioFrame* frame) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of |src| points to a channel buffer, arranged according to + // |input_layout|. At output, the channels will be arranged according to + // |output_layout| at |output_sample_rate_hz| in |dest|. + // + // The output layout must have one channel or as many channels as the input. + // |src| and |dest| may use the same memory, if desired. + // + // TODO(mgraczyk): Remove once clients are updated to use the new interface. + virtual int ProcessStream(const float* const* src, + size_t samples_per_channel, + int input_sample_rate_hz, + ChannelLayout input_layout, + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // |src| points to a channel buffer, arranged according to |input_stream|. At + // output, the channels will be arranged according to |output_stream| in + // |dest|. + // + // The output must have one channel or as many channels as the input. |src| + // and |dest| may use the same memory, if desired. + virtual int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + + // Processes a 10 ms |frame| of the reverse direction audio stream. The frame + // may be modified. On the client-side, this is the far-end (or to be + // rendered) audio. + // + // It is necessary to provide this if echo processing is enabled, as the + // reverse stream forms the echo reference signal. It is recommended, but not + // necessary, to provide if gain control is enabled. On the server-side this + // typically will not be used. If you're not sure what to pass in here, + // chances are you don't need to use it. + // + // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| + // members of |frame| must be valid. + virtual int ProcessReverseStream(AudioFrame* frame) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of |data| points to a channel buffer, arranged according to |layout|. + // TODO(mgraczyk): Remove once clients are updated to use the new interface. + virtual int AnalyzeReverseStream(const float* const* data, + size_t samples_per_channel, + int sample_rate_hz, + ChannelLayout layout) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // |data| points to a channel buffer, arranged according to |reverse_config|. + virtual int ProcessReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + + // This must be called if and only if echo processing is enabled. + // + // Sets the |delay| in ms between ProcessReverseStream() receiving a far-end + // frame and ProcessStream() receiving a near-end frame containing the + // corresponding echo. On the client-side this can be expressed as + // delay = (t_render - t_analyze) + (t_process - t_capture) + // where, + // - t_analyze is the time a frame is passed to ProcessReverseStream() and + // t_render is the time the first sample of the same frame is rendered by + // the audio hardware. + // - t_capture is the time the first sample of a frame is captured by the + // audio hardware and t_process is the time the same frame is passed to + // ProcessStream(). + virtual int set_stream_delay_ms(int delay) = 0; + virtual int stream_delay_ms() const = 0; + virtual bool was_stream_delay_set() const = 0; + + // Call to signal that a key press occurred (true) or did not occur (false) + // with this chunk of audio. + virtual void set_stream_key_pressed(bool key_pressed) = 0; + + // Sets a delay |offset| in ms to add to the values passed in through + // set_stream_delay_ms(). May be positive or negative. + // + // Note that this could cause an otherwise valid value passed to + // set_stream_delay_ms() to return an error. + virtual void set_delay_offset_ms(int offset) = 0; + virtual int delay_offset_ms() const = 0; + + // Attaches provided webrtc::AecDump for recording debugging + // information. Log file and maximum file size logic is supposed to + // be handled by implementing instance of AecDump. Calling this + // method when another AecDump is attached resets the active AecDump + // with a new one. This causes the d-tor of the earlier AecDump to + // be called. The d-tor call may block until all pending logging + // tasks are completed. + virtual void AttachAecDump(std::unique_ptr<AecDump> aec_dump) = 0; + + // If no AecDump is attached, this has no effect. If an AecDump is + // attached, it's destructor is called. The d-tor may block until + // all pending logging tasks are completed. + virtual void DetachAecDump() = 0; + + // Use to send UMA histograms at end of a call. Note that all histogram + // specific member variables are reset. + virtual void UpdateHistogramsOnCallEnd() = 0; + + // TODO(ivoc): Remove when the calling code no longer uses the old Statistics + // API. + struct Statistic { + int instant = 0; // Instantaneous value. + int average = 0; // Long-term average. + int maximum = 0; // Long-term maximum. + int minimum = 0; // Long-term minimum. + }; + + struct Stat { + void Set(const Statistic& other) { + Set(other.instant, other.average, other.maximum, other.minimum); + } + void Set(float instant, float average, float maximum, float minimum) { + instant_ = instant; + average_ = average; + maximum_ = maximum; + minimum_ = minimum; + } + float instant() const { return instant_; } + float average() const { return average_; } + float maximum() const { return maximum_; } + float minimum() const { return minimum_; } + + private: + float instant_ = 0.0f; // Instantaneous value. + float average_ = 0.0f; // Long-term average. + float maximum_ = 0.0f; // Long-term maximum. + float minimum_ = 0.0f; // Long-term minimum. + }; + + struct AudioProcessingStatistics { + AudioProcessingStatistics(); + AudioProcessingStatistics(const AudioProcessingStatistics& other); + ~AudioProcessingStatistics(); + + // AEC Statistics. + // RERL = ERL + ERLE + Stat residual_echo_return_loss; + // ERL = 10log_10(P_far / P_echo) + Stat echo_return_loss; + // ERLE = 10log_10(P_echo / P_out) + Stat echo_return_loss_enhancement; + // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a) + Stat a_nlp; + // Fraction of time that the AEC linear filter is divergent, in a 1-second + // non-overlapped aggregation window. + float divergent_filter_fraction = -1.0f; + + // The delay metrics consists of the delay median and standard deviation. It + // also consists of the fraction of delay estimates that can make the echo + // cancellation perform poorly. The values are aggregated until the first + // call to |GetStatistics()| and afterwards aggregated and updated every + // second. Note that if there are several clients pulling metrics from + // |GetStatistics()| during a session the first call from any of them will + // change to one second aggregation window for all. + int delay_median = -1; + int delay_standard_deviation = -1; + float fraction_poor_delays = -1.0f; + + // Residual echo detector likelihood. + float residual_echo_likelihood = -1.0f; + // Maximum residual echo likelihood from the last time period. + float residual_echo_likelihood_recent_max = -1.0f; + }; + + // TODO(ivoc): Make this pure virtual when all subclasses have been updated. + virtual AudioProcessingStatistics GetStatistics() const; + + // This returns the stats as optionals and it will replace the regular + // GetStatistics. + virtual AudioProcessingStats GetStatistics(bool has_remote_tracks) const; + + // These provide access to the component interfaces and should never return + // NULL. The pointers will be valid for the lifetime of the APM instance. + // The memory for these objects is entirely managed internally. + virtual EchoCancellation* echo_cancellation() const = 0; + virtual EchoControlMobile* echo_control_mobile() const = 0; + virtual GainControl* gain_control() const = 0; + // TODO(peah): Deprecate this API call. + virtual HighPassFilter* high_pass_filter() const = 0; + virtual LevelEstimator* level_estimator() const = 0; + virtual NoiseSuppression* noise_suppression() const = 0; + virtual VoiceDetection* voice_detection() const = 0; + + // Returns the last applied configuration. + virtual AudioProcessing::Config GetConfig() const = 0; + + enum Error { + // Fatal errors. + kNoError = 0, + kUnspecifiedError = -1, + kCreationFailedError = -2, + kUnsupportedComponentError = -3, + kUnsupportedFunctionError = -4, + kNullPointerError = -5, + kBadParameterError = -6, + kBadSampleRateError = -7, + kBadDataLengthError = -8, + kBadNumberChannelsError = -9, + kFileError = -10, + kStreamParameterNotSetError = -11, + kNotEnabledError = -12, + + // Warnings are non-fatal. + // This results when a set_stream_ parameter is out of range. Processing + // will continue, but the parameter may have been truncated. + kBadStreamParameterWarning = -13 + }; + + enum NativeRate { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate44_1kHz = 44100, + kSampleRate48kHz = 48000 + }; + + // TODO(kwiberg): We currently need to support a compiler (Visual C++) that + // complains if we don't explicitly state the size of the array here. Remove + // the size when that's no longer the case. + static constexpr int kNativeSampleRatesHz[4] = { + kSampleRate8kHz, kSampleRate16kHz, kSampleRate32kHz, kSampleRate48kHz}; + static constexpr size_t kNumNativeSampleRates = + arraysize(kNativeSampleRatesHz); + static constexpr int kMaxNativeSampleRateHz = + kNativeSampleRatesHz[kNumNativeSampleRates - 1]; + + static const int kChunkSizeMs = 10; +}; + +class StreamConfig { + public: + // sample_rate_hz: The sampling rate of the stream. + // + // num_channels: The number of audio channels in the stream, excluding the + // keyboard channel if it is present. When passing a + // StreamConfig with an array of arrays T*[N], + // + // N == {num_channels + 1 if has_keyboard + // {num_channels if !has_keyboard + // + // has_keyboard: True if the stream has a keyboard channel. When has_keyboard + // is true, the last channel in any corresponding list of + // channels is the keyboard channel. + StreamConfig(int sample_rate_hz = 0, + size_t num_channels = 0, + bool has_keyboard = false) + : sample_rate_hz_(sample_rate_hz), + num_channels_(num_channels), + has_keyboard_(has_keyboard), + num_frames_(calculate_frames(sample_rate_hz)) {} + + void set_sample_rate_hz(int value) { + sample_rate_hz_ = value; + num_frames_ = calculate_frames(value); + } + void set_num_channels(size_t value) { num_channels_ = value; } + void set_has_keyboard(bool value) { has_keyboard_ = value; } + + int sample_rate_hz() const { return sample_rate_hz_; } + + // The number of channels in the stream, not including the keyboard channel if + // present. + size_t num_channels() const { return num_channels_; } + + bool has_keyboard() const { return has_keyboard_; } + size_t num_frames() const { return num_frames_; } + size_t num_samples() const { return num_channels_ * num_frames_; } + + bool operator==(const StreamConfig& other) const { + return sample_rate_hz_ == other.sample_rate_hz_ && + num_channels_ == other.num_channels_ && + has_keyboard_ == other.has_keyboard_; + } + + bool operator!=(const StreamConfig& other) const { return !(*this == other); } + + private: + static size_t calculate_frames(int sample_rate_hz) { + return static_cast<size_t>( + AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000); + } + + int sample_rate_hz_; + size_t num_channels_; + bool has_keyboard_; + size_t num_frames_; +}; + +class ProcessingConfig { + public: + enum StreamName { + kInputStream, + kOutputStream, + kReverseInputStream, + kReverseOutputStream, + kNumStreamNames, + }; + + const StreamConfig& input_stream() const { + return streams[StreamName::kInputStream]; + } + const StreamConfig& output_stream() const { + return streams[StreamName::kOutputStream]; + } + const StreamConfig& reverse_input_stream() const { + return streams[StreamName::kReverseInputStream]; + } + const StreamConfig& reverse_output_stream() const { + return streams[StreamName::kReverseOutputStream]; + } + + StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } + StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } + StreamConfig& reverse_input_stream() { + return streams[StreamName::kReverseInputStream]; + } + StreamConfig& reverse_output_stream() { + return streams[StreamName::kReverseOutputStream]; + } + + bool operator==(const ProcessingConfig& other) const { + for (int i = 0; i < StreamName::kNumStreamNames; ++i) { + if (this->streams[i] != other.streams[i]) { + return false; + } + } + return true; + } + + bool operator!=(const ProcessingConfig& other) const { + return !(*this == other); + } + + StreamConfig streams[StreamName::kNumStreamNames]; +}; + +// The acoustic echo cancellation (AEC) component provides better performance +// than AECM but also requires more processing power and is dependent on delay +// stability and reporting accuracy. As such it is well-suited and recommended +// for PC and IP phone applications. +// +// Not recommended to be enabled on the server-side. +class EchoCancellation { + public: + // EchoCancellation and EchoControlMobile may not be enabled simultaneously. + // Enabling one will disable the other. + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Differences in clock speed on the primary and reverse streams can impact + // the AEC performance. On the client-side, this could be seen when different + // render and capture devices are used, particularly with webcams. + // + // This enables a compensation mechanism, and requires that + // set_stream_drift_samples() be called. + virtual int enable_drift_compensation(bool enable) = 0; + virtual bool is_drift_compensation_enabled() const = 0; + + // Sets the difference between the number of samples rendered and captured by + // the audio devices since the last call to |ProcessStream()|. Must be called + // if drift compensation is enabled, prior to |ProcessStream()|. + virtual void set_stream_drift_samples(int drift) = 0; + virtual int stream_drift_samples() const = 0; + + enum SuppressionLevel { + kLowSuppression, + kModerateSuppression, + kHighSuppression + }; + + // Sets the aggressiveness of the suppressor. A higher level trades off + // double-talk performance for increased echo suppression. + virtual int set_suppression_level(SuppressionLevel level) = 0; + virtual SuppressionLevel suppression_level() const = 0; + + // Returns false if the current frame almost certainly contains no echo + // and true if it _might_ contain echo. + virtual bool stream_has_echo() const = 0; + + // Enables the computation of various echo metrics. These are obtained + // through |GetMetrics()|. + virtual int enable_metrics(bool enable) = 0; + virtual bool are_metrics_enabled() const = 0; + + // Each statistic is reported in dB. + // P_far: Far-end (render) signal power. + // P_echo: Near-end (capture) echo signal power. + // P_out: Signal power at the output of the AEC. + // P_a: Internal signal power at the point before the AEC's non-linear + // processor. + struct Metrics { + // RERL = ERL + ERLE + AudioProcessing::Statistic residual_echo_return_loss; + + // ERL = 10log_10(P_far / P_echo) + AudioProcessing::Statistic echo_return_loss; + + // ERLE = 10log_10(P_echo / P_out) + AudioProcessing::Statistic echo_return_loss_enhancement; + + // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a) + AudioProcessing::Statistic a_nlp; + + // Fraction of time that the AEC linear filter is divergent, in a 1-second + // non-overlapped aggregation window. + float divergent_filter_fraction; + }; + + // Deprecated. Use GetStatistics on the AudioProcessing interface instead. + // TODO(ajm): discuss the metrics update period. + virtual int GetMetrics(Metrics* metrics) = 0; + + // Enables computation and logging of delay values. Statistics are obtained + // through |GetDelayMetrics()|. + virtual int enable_delay_logging(bool enable) = 0; + virtual bool is_delay_logging_enabled() const = 0; + + // The delay metrics consists of the delay |median| and the delay standard + // deviation |std|. It also consists of the fraction of delay estimates + // |fraction_poor_delays| that can make the echo cancellation perform poorly. + // The values are aggregated until the first call to |GetDelayMetrics()| and + // afterwards aggregated and updated every second. + // Note that if there are several clients pulling metrics from + // |GetDelayMetrics()| during a session the first call from any of them will + // change to one second aggregation window for all. + // Deprecated. Use GetStatistics on the AudioProcessing interface instead. + virtual int GetDelayMetrics(int* median, int* std) = 0; + // Deprecated. Use GetStatistics on the AudioProcessing interface instead. + virtual int GetDelayMetrics(int* median, int* std, + float* fraction_poor_delays) = 0; + + // Returns a pointer to the low level AEC component. In case of multiple + // channels, the pointer to the first one is returned. A NULL pointer is + // returned when the AEC component is disabled or has not been initialized + // successfully. + virtual struct AecCore* aec_core() const = 0; + + protected: + virtual ~EchoCancellation() {} +}; + +// The acoustic echo control for mobile (AECM) component is a low complexity +// robust option intended for use on mobile devices. +// +// Not recommended to be enabled on the server-side. +class EchoControlMobile { + public: + // EchoCancellation and EchoControlMobile may not be enabled simultaneously. + // Enabling one will disable the other. + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Recommended settings for particular audio routes. In general, the louder + // the echo is expected to be, the higher this value should be set. The + // preferred setting may vary from device to device. + enum RoutingMode { + kQuietEarpieceOrHeadset, + kEarpiece, + kLoudEarpiece, + kSpeakerphone, + kLoudSpeakerphone + }; + + // Sets echo control appropriate for the audio routing |mode| on the device. + // It can and should be updated during a call if the audio routing changes. + virtual int set_routing_mode(RoutingMode mode) = 0; + virtual RoutingMode routing_mode() const = 0; + + // Comfort noise replaces suppressed background noise to maintain a + // consistent signal level. + virtual int enable_comfort_noise(bool enable) = 0; + virtual bool is_comfort_noise_enabled() const = 0; + + // A typical use case is to initialize the component with an echo path from a + // previous call. The echo path is retrieved using |GetEchoPath()|, typically + // at the end of a call. The data can then be stored for later use as an + // initializer before the next call, using |SetEchoPath()|. + // + // Controlling the echo path this way requires the data |size_bytes| to match + // the internal echo path size. This size can be acquired using + // |echo_path_size_bytes()|. |SetEchoPath()| causes an entire reset, worth + // noting if it is to be called during an ongoing call. + // + // It is possible that version incompatibilities may result in a stored echo + // path of the incorrect size. In this case, the stored path should be + // discarded. + virtual int SetEchoPath(const void* echo_path, size_t size_bytes) = 0; + virtual int GetEchoPath(void* echo_path, size_t size_bytes) const = 0; + + // The returned path size is guaranteed not to change for the lifetime of + // the application. + static size_t echo_path_size_bytes(); + + protected: + virtual ~EchoControlMobile() {} +}; + +// Interface for an acoustic echo cancellation (AEC) submodule. +class EchoControl { + public: + // Analysis (not changing) of the render signal. + virtual void AnalyzeRender(AudioBuffer* render) = 0; + + // Analysis (not changing) of the capture signal. + virtual void AnalyzeCapture(AudioBuffer* capture) = 0; + + // Processes the capture signal in order to remove the echo. + virtual void ProcessCapture(AudioBuffer* capture, bool echo_path_change) = 0; + + struct Metrics { + double echo_return_loss; + double echo_return_loss_enhancement; + int delay_ms; + }; + + // Collect current metrics from the echo controller. + virtual Metrics GetMetrics() const = 0; + + virtual ~EchoControl() {} +}; + +// Interface for a factory that creates EchoControllers. +class EchoControlFactory { + public: + virtual std::unique_ptr<EchoControl> Create(int sample_rate_hz) = 0; + virtual ~EchoControlFactory() = default; +}; + +// The automatic gain control (AGC) component brings the signal to an +// appropriate range. This is done by applying a digital gain directly and, in +// the analog mode, prescribing an analog gain to be applied at the audio HAL. +// +// Recommended to be enabled on the client-side. +class GainControl { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // When an analog mode is set, this must be called prior to |ProcessStream()| + // to pass the current analog level from the audio HAL. Must be within the + // range provided to |set_analog_level_limits()|. + virtual int set_stream_analog_level(int level) = 0; + + // When an analog mode is set, this should be called after |ProcessStream()| + // to obtain the recommended new analog level for the audio HAL. It is the + // users responsibility to apply this level. + virtual int stream_analog_level() = 0; + + enum Mode { + // Adaptive mode intended for use if an analog volume control is available + // on the capture device. It will require the user to provide coupling + // between the OS mixer controls and AGC through the |stream_analog_level()| + // functions. + // + // It consists of an analog gain prescription for the audio device and a + // digital compression stage. + kAdaptiveAnalog, + + // Adaptive mode intended for situations in which an analog volume control + // is unavailable. It operates in a similar fashion to the adaptive analog + // mode, but with scaling instead applied in the digital domain. As with + // the analog mode, it additionally uses a digital compression stage. + kAdaptiveDigital, + + // Fixed mode which enables only the digital compression stage also used by + // the two adaptive modes. + // + // It is distinguished from the adaptive modes by considering only a + // short time-window of the input signal. It applies a fixed gain through + // most of the input level range, and compresses (gradually reduces gain + // with increasing level) the input signal at higher levels. This mode is + // preferred on embedded devices where the capture signal level is + // predictable, so that a known gain can be applied. + kFixedDigital + }; + + virtual int set_mode(Mode mode) = 0; + virtual Mode mode() const = 0; + + // Sets the target peak |level| (or envelope) of the AGC in dBFs (decibels + // from digital full-scale). The convention is to use positive values. For + // instance, passing in a value of 3 corresponds to -3 dBFs, or a target + // level 3 dB below full-scale. Limited to [0, 31]. + // + // TODO(ajm): use a negative value here instead, if/when VoE will similarly + // update its interface. + virtual int set_target_level_dbfs(int level) = 0; + virtual int target_level_dbfs() const = 0; + + // Sets the maximum |gain| the digital compression stage may apply, in dB. A + // higher number corresponds to greater compression, while a value of 0 will + // leave the signal uncompressed. Limited to [0, 90]. + virtual int set_compression_gain_db(int gain) = 0; + virtual int compression_gain_db() const = 0; + + // When enabled, the compression stage will hard limit the signal to the + // target level. Otherwise, the signal will be compressed but not limited + // above the target level. + virtual int enable_limiter(bool enable) = 0; + virtual bool is_limiter_enabled() const = 0; + + // Sets the |minimum| and |maximum| analog levels of the audio capture device. + // Must be set if and only if an analog mode is used. Limited to [0, 65535]. + virtual int set_analog_level_limits(int minimum, + int maximum) = 0; + virtual int analog_level_minimum() const = 0; + virtual int analog_level_maximum() const = 0; + + // Returns true if the AGC has detected a saturation event (period where the + // signal reaches digital full-scale) in the current frame and the analog + // level cannot be reduced. + // + // This could be used as an indicator to reduce or disable analog mic gain at + // the audio HAL. + virtual bool stream_is_saturated() const = 0; + + protected: + virtual ~GainControl() {} +}; +// TODO(peah): Remove this interface. +// A filtering component which removes DC offset and low-frequency noise. +// Recommended to be enabled on the client-side. +class HighPassFilter { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + virtual ~HighPassFilter() {} +}; + +// An estimation component used to retrieve level metrics. +class LevelEstimator { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Returns the root mean square (RMS) level in dBFs (decibels from digital + // full-scale), or alternately dBov. It is computed over all primary stream + // frames since the last call to RMS(). The returned value is positive but + // should be interpreted as negative. It is constrained to [0, 127]. + // + // The computation follows: https://tools.ietf.org/html/rfc6465 + // with the intent that it can provide the RTP audio level indication. + // + // Frames passed to ProcessStream() with an |_energy| of zero are considered + // to have been muted. The RMS of the frame will be interpreted as -127. + virtual int RMS() = 0; + + protected: + virtual ~LevelEstimator() {} +}; + +// The noise suppression (NS) component attempts to remove noise while +// retaining speech. Recommended to be enabled on the client-side. +// +// Recommended to be enabled on the client-side. +class NoiseSuppression { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Determines the aggressiveness of the suppression. Increasing the level + // will reduce the noise level at the expense of a higher speech distortion. + enum Level { + kLow, + kModerate, + kHigh, + kVeryHigh + }; + + virtual int set_level(Level level) = 0; + virtual Level level() const = 0; + + // Returns the internally computed prior speech probability of current frame + // averaged over output channels. This is not supported in fixed point, for + // which |kUnsupportedFunctionError| is returned. + virtual float speech_probability() const = 0; + + // Returns the noise estimate per frequency bin averaged over all channels. + virtual std::vector<float> NoiseEstimate() = 0; + + protected: + virtual ~NoiseSuppression() {} +}; + +// Interface for a post processing submodule. +class PostProcessing { + public: + // (Re-)Initializes the submodule. + virtual void Initialize(int sample_rate_hz, int num_channels) = 0; + // Processes the given capture or render signal. + virtual void Process(AudioBuffer* audio) = 0; + // Returns a string representation of the module state. + virtual std::string ToString() const = 0; + + virtual ~PostProcessing() {} +}; + +// The voice activity detection (VAD) component analyzes the stream to +// determine if voice is present. A facility is also provided to pass in an +// external VAD decision. +// +// In addition to |stream_has_voice()| the VAD decision is provided through the +// |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will be +// modified to reflect the current decision. +class VoiceDetection { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Returns true if voice is detected in the current frame. Should be called + // after |ProcessStream()|. + virtual bool stream_has_voice() const = 0; + + // Some of the APM functionality requires a VAD decision. In the case that + // a decision is externally available for the current frame, it can be passed + // in here, before |ProcessStream()| is called. + // + // VoiceDetection does _not_ need to be enabled to use this. If it happens to + // be enabled, detection will be skipped for any frame in which an external + // VAD decision is provided. + virtual int set_stream_has_voice(bool has_voice) = 0; + + // Specifies the likelihood that a frame will be declared to contain voice. + // A higher value makes it more likely that speech will not be clipped, at + // the expense of more noise being detected as voice. + enum Likelihood { + kVeryLowLikelihood, + kLowLikelihood, + kModerateLikelihood, + kHighLikelihood + }; + + virtual int set_likelihood(Likelihood likelihood) = 0; + virtual Likelihood likelihood() const = 0; + + // Sets the |size| of the frames in ms on which the VAD will operate. Larger + // frames will improve detection accuracy, but reduce the frequency of + // updates. + // + // This does not impact the size of frames passed to |ProcessStream()|. + virtual int set_frame_size_ms(int size) = 0; + virtual int frame_size_ms() const = 0; + + protected: + virtual ~VoiceDetection() {} +}; + +// Configuration struct for EchoCanceller3 +struct EchoCanceller3Config { + struct Delay { + size_t default_delay = 5; + size_t down_sampling_factor = 4; + size_t num_filters = 4; + } delay; + + struct Erle { + float min = 1.f; + float max_l = 8.f; + float max_h = 1.5f; + } erle; + + struct EpStrength { + float lf = 10.f; + float mf = 10.f; + float hf = 10.f; + float default_len = 0.f; + bool echo_can_saturate = true; + bool bounded_erl = false; + } ep_strength; + + struct Mask { + float m1 = 0.01f; + float m2 = 0.0001f; + float m3 = 0.01f; + float m4 = 0.1f; + float m5 = 0.3f; + float m6 = 0.0001f; + float m7 = 0.01f; + float m8 = 0.0001f; + float m9 = 0.1f; + } gain_mask; + + struct EchoAudibility { + float low_render_limit = 4 * 64.f; + float normal_render_limit = 64.f; + } echo_audibility; + + struct RenderLevels { + float active_render_limit = 100.f; + float poor_excitation_render_limit = 150.f; + } render_levels; + + struct GainUpdates { + struct GainChanges { + float max_inc; + float max_dec; + float rate_inc; + float rate_dec; + float min_inc; + float min_dec; + }; + + GainChanges low_noise = {3.f, 3.f, 1.5f, 1.5f, 1.5f, 1.5f}; + GainChanges normal = {2.f, 2.f, 1.5f, 1.5f, 1.2f, 1.2f}; + GainChanges saturation = {1.2f, 1.2f, 1.5f, 1.5f, 1.f, 1.f}; + GainChanges nonlinear = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f}; + + float floor_first_increase = 0.0001f; + } gain_updates; +}; + +class EchoCanceller3Factory : public EchoControlFactory { + public: + EchoCanceller3Factory(); + EchoCanceller3Factory(const EchoCanceller3Config& config); + std::unique_ptr<EchoControl> Create(int sample_rate_hz) override; + + private: + EchoCanceller3Config config_; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing_statistics.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing_statistics.cc new file mode 100644 index 0000000000..7139ee502e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing_statistics.cc @@ -0,0 +1,22 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/audio_processing_statistics.h" + +namespace webrtc { + +AudioProcessingStats::AudioProcessingStats() = default; + +AudioProcessingStats::AudioProcessingStats(const AudioProcessingStats& other) = + default; + +AudioProcessingStats::~AudioProcessingStats() = default; + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing_statistics.h b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing_statistics.h new file mode 100644 index 0000000000..83c9d9932f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/audio_processing_statistics.h @@ -0,0 +1,56 @@ +/* + * Copyright 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ + +#include "api/optional.h" + +namespace webrtc { +// This version of the stats uses Optionals, it will replace the regular +// AudioProcessingStatistics struct. +struct AudioProcessingStats { + AudioProcessingStats(); + AudioProcessingStats(const AudioProcessingStats& other); + ~AudioProcessingStats(); + + // AEC Statistics. + // ERL = 10log_10(P_far / P_echo) + rtc::Optional<double> echo_return_loss; + // ERLE = 10log_10(P_echo / P_out) + rtc::Optional<double> echo_return_loss_enhancement; + // Fraction of time that the AEC linear filter is divergent, in a 1-second + // non-overlapped aggregation window. + rtc::Optional<double> divergent_filter_fraction; + + // The delay metrics consists of the delay median and standard deviation. It + // also consists of the fraction of delay estimates that can make the echo + // cancellation perform poorly. The values are aggregated until the first + // call to |GetStatistics()| and afterwards aggregated and updated every + // second. Note that if there are several clients pulling metrics from + // |GetStatistics()| during a session the first call from any of them will + // change to one second aggregation window for all. + rtc::Optional<int32_t> delay_median_ms; + rtc::Optional<int32_t> delay_standard_deviation_ms; + + // Residual echo detector likelihood. + rtc::Optional<double> residual_echo_likelihood; + // Maximum residual echo likelihood from the last time period. + rtc::Optional<double> residual_echo_likelihood_recent_max; + + // The instantaneous delay estimate produced in the AEC. The unit is in + // milliseconds and the value is the instantaneous value at the time of the + // call to |GetStatistics()|. + int delay_ms; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_STATISTICS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/config.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/include/config.cc new file mode 100644 index 0000000000..14240db305 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/config.cc @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/include/config.h" + +namespace webrtc { + +Config::Config() {} + +Config::~Config() { + for (OptionMap::iterator it = options_.begin(); it != options_.end(); ++it) { + delete it->second; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/config.h b/third_party/libwebrtc/webrtc/modules/audio_processing/include/config.h new file mode 100644 index 0000000000..16128943b3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/config.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_CONFIG_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_CONFIG_H_ + +#include <map> + +#include "rtc_base/basictypes.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Only add new values to the end of the enumeration and never remove (only +// deprecate) to maintain binary compatibility. +enum class ConfigOptionID { + kMyExperimentForTest, + kAlgo1CostFunctionForTest, + kTemporalLayersFactory, // Deprecated + kNetEqCapacityConfig, // Deprecated + kNetEqFastAccelerate, // Deprecated + kVoicePacing, // Deprecated + kExtendedFilter, + kDelayAgnostic, + kExperimentalAgc, + kExperimentalNs, + kBeamforming, + kIntelligibility, + kEchoCanceller3, // Deprecated + kAecRefinedAdaptiveFilter, + kLevelControl, + kCaptureDeviceInfo +}; + +// Class Config is designed to ease passing a set of options across webrtc code. +// Options are identified by typename in order to avoid incorrect casts. +// +// Usage: +// * declaring an option: +// struct Algo1_CostFunction { +// virtual float cost(int x) const { return x; } +// virtual ~Algo1_CostFunction() {} +// }; +// +// * accessing an option: +// config.Get<Algo1_CostFunction>().cost(value); +// +// * setting an option: +// struct SqrCost : Algo1_CostFunction { +// virtual float cost(int x) const { return x*x; } +// }; +// config.Set<Algo1_CostFunction>(new SqrCost()); +// +// Note: This class is thread-compatible (like STL containers). +class Config { + public: + // Returns the option if set or a default constructed one. + // Callers that access options too often are encouraged to cache the result. + // Returned references are owned by this. + // + // Requires std::is_default_constructible<T> + template<typename T> const T& Get() const; + + // Set the option, deleting any previous instance of the same. + // This instance gets ownership of the newly set value. + template<typename T> void Set(T* value); + + Config(); + ~Config(); + + private: + struct BaseOption { + virtual ~BaseOption() {} + }; + + template<typename T> + struct Option : BaseOption { + explicit Option(T* v): value(v) {} + ~Option() { + delete value; + } + T* value; + }; + + template<typename T> + static ConfigOptionID identifier() { + return T::identifier; + } + + // Used to instantiate a default constructed object that doesn't needs to be + // owned. This allows Get<T> to be implemented without requiring explicitly + // locks. + template<typename T> + static const T& default_value() { + RTC_DEFINE_STATIC_LOCAL(const T, def, ()); + return def; + } + + typedef std::map<ConfigOptionID, BaseOption*> OptionMap; + OptionMap options_; + + // RTC_DISALLOW_COPY_AND_ASSIGN + Config(const Config&); + void operator=(const Config&); +}; + +template<typename T> +const T& Config::Get() const { + OptionMap::const_iterator it = options_.find(identifier<T>()); + if (it != options_.end()) { + const T* t = static_cast<Option<T>*>(it->second)->value; + if (t) { + return *t; + } + } + return default_value<T>(); +} + +template<typename T> +void Config::Set(T* value) { + BaseOption*& it = options_[identifier<T>()]; + delete it; + it = new Option<T>(value); +} +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_CONFIG_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/include/mock_audio_processing.h b/third_party/libwebrtc/webrtc/modules/audio_processing/include/mock_audio_processing.h new file mode 100644 index 0000000000..f2bdc2f241 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/include/mock_audio_processing.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ +#define MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ + +#include <memory> + +#include "modules/audio_processing/include/aec_dump.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/include/audio_processing_statistics.h" +#include "test/gmock.h" + +namespace webrtc { + +namespace test { + +class MockEchoCancellation : public EchoCancellation { + public: + virtual ~MockEchoCancellation() {} + MOCK_METHOD1(Enable, int(bool enable)); + MOCK_CONST_METHOD0(is_enabled, bool()); + MOCK_METHOD1(enable_drift_compensation, int(bool enable)); + MOCK_CONST_METHOD0(is_drift_compensation_enabled, bool()); + MOCK_METHOD1(set_stream_drift_samples, void(int drift)); + MOCK_CONST_METHOD0(stream_drift_samples, int()); + MOCK_METHOD1(set_suppression_level, int(SuppressionLevel level)); + MOCK_CONST_METHOD0(suppression_level, SuppressionLevel()); + MOCK_CONST_METHOD0(stream_has_echo, bool()); + MOCK_METHOD1(enable_metrics, int(bool enable)); + MOCK_CONST_METHOD0(are_metrics_enabled, bool()); + MOCK_METHOD1(GetMetrics, int(Metrics* metrics)); + MOCK_METHOD1(enable_delay_logging, int(bool enable)); + MOCK_CONST_METHOD0(is_delay_logging_enabled, bool()); + MOCK_METHOD2(GetDelayMetrics, int(int* median, int* std)); + MOCK_METHOD3(GetDelayMetrics, int(int* median, int* std, + float* fraction_poor_delays)); + MOCK_CONST_METHOD0(aec_core, struct AecCore*()); +}; + +class MockEchoControlMobile : public EchoControlMobile { + public: + virtual ~MockEchoControlMobile() {} + MOCK_METHOD1(Enable, int(bool enable)); + MOCK_CONST_METHOD0(is_enabled, bool()); + MOCK_METHOD1(set_routing_mode, int(RoutingMode mode)); + MOCK_CONST_METHOD0(routing_mode, RoutingMode()); + MOCK_METHOD1(enable_comfort_noise, int(bool enable)); + MOCK_CONST_METHOD0(is_comfort_noise_enabled, bool()); + MOCK_METHOD2(SetEchoPath, int(const void* echo_path, size_t size_bytes)); + MOCK_CONST_METHOD2(GetEchoPath, int(void* echo_path, size_t size_bytes)); +}; + +class MockGainControl : public GainControl { + public: + virtual ~MockGainControl() {} + MOCK_METHOD1(Enable, int(bool enable)); + MOCK_CONST_METHOD0(is_enabled, bool()); + MOCK_METHOD1(set_stream_analog_level, int(int level)); + MOCK_METHOD0(stream_analog_level, int()); + MOCK_METHOD1(set_mode, int(Mode mode)); + MOCK_CONST_METHOD0(mode, Mode()); + MOCK_METHOD1(set_target_level_dbfs, int(int level)); + MOCK_CONST_METHOD0(target_level_dbfs, int()); + MOCK_METHOD1(set_compression_gain_db, int(int gain)); + MOCK_CONST_METHOD0(compression_gain_db, int()); + MOCK_METHOD1(enable_limiter, int(bool enable)); + MOCK_CONST_METHOD0(is_limiter_enabled, bool()); + MOCK_METHOD2(set_analog_level_limits, int(int minimum, int maximum)); + MOCK_CONST_METHOD0(analog_level_minimum, int()); + MOCK_CONST_METHOD0(analog_level_maximum, int()); + MOCK_CONST_METHOD0(stream_is_saturated, bool()); +}; + +class MockHighPassFilter : public HighPassFilter { + public: + virtual ~MockHighPassFilter() {} + MOCK_METHOD1(Enable, int(bool enable)); + MOCK_CONST_METHOD0(is_enabled, bool()); +}; + +class MockLevelEstimator : public LevelEstimator { + public: + virtual ~MockLevelEstimator() {} + MOCK_METHOD1(Enable, int(bool enable)); + MOCK_CONST_METHOD0(is_enabled, bool()); + MOCK_METHOD0(RMS, int()); +}; + +class MockNoiseSuppression : public NoiseSuppression { + public: + virtual ~MockNoiseSuppression() {} + MOCK_METHOD1(Enable, int(bool enable)); + MOCK_CONST_METHOD0(is_enabled, bool()); + MOCK_METHOD1(set_level, int(Level level)); + MOCK_CONST_METHOD0(level, Level()); + MOCK_CONST_METHOD0(speech_probability, float()); + MOCK_METHOD0(NoiseEstimate, std::vector<float>()); +}; + +class MockPostProcessing : public PostProcessing { + public: + virtual ~MockPostProcessing() {} + MOCK_METHOD2(Initialize, void(int sample_rate_hz, int num_channels)); + MOCK_METHOD1(Process, void(AudioBuffer* audio)); + MOCK_CONST_METHOD0(ToString, std::string()); +}; + +class MockEchoControl : public EchoControl { + public: + virtual ~MockEchoControl() {} + MOCK_METHOD1(AnalyzeRender, void(AudioBuffer* render)); + MOCK_METHOD1(AnalyzeCapture, void(AudioBuffer* capture)); + MOCK_METHOD2(ProcessCapture, + void(AudioBuffer* capture, bool echo_path_change)); + MOCK_CONST_METHOD0(GetMetrics, Metrics()); +}; + +class MockVoiceDetection : public VoiceDetection { + public: + virtual ~MockVoiceDetection() {} + MOCK_METHOD1(Enable, int(bool enable)); + MOCK_CONST_METHOD0(is_enabled, bool()); + MOCK_CONST_METHOD0(stream_has_voice, bool()); + MOCK_METHOD1(set_stream_has_voice, int(bool has_voice)); + MOCK_METHOD1(set_likelihood, int(Likelihood likelihood)); + MOCK_CONST_METHOD0(likelihood, Likelihood()); + MOCK_METHOD1(set_frame_size_ms, int(int size)); + MOCK_CONST_METHOD0(frame_size_ms, int()); +}; + +class MockAudioProcessing : public AudioProcessing { + public: + MockAudioProcessing() + : echo_cancellation_(new testing::NiceMock<MockEchoCancellation>()), + echo_control_mobile_(new testing::NiceMock<MockEchoControlMobile>()), + gain_control_(new testing::NiceMock<MockGainControl>()), + high_pass_filter_(new testing::NiceMock<MockHighPassFilter>()), + level_estimator_(new testing::NiceMock<MockLevelEstimator>()), + noise_suppression_(new testing::NiceMock<MockNoiseSuppression>()), + voice_detection_(new testing::NiceMock<MockVoiceDetection>()) { + } + + virtual ~MockAudioProcessing() {} + + MOCK_METHOD0(Initialize, int()); + MOCK_METHOD6(Initialize, int(int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + int render_sample_rate_hz, + ChannelLayout capture_input_layout, + ChannelLayout capture_output_layout, + ChannelLayout render_input_layout)); + MOCK_METHOD1(Initialize, int(const ProcessingConfig& processing_config)); + MOCK_METHOD1(ApplyConfig, void(const Config& config)); + MOCK_METHOD1(SetExtraOptions, void(const webrtc::Config& config)); + MOCK_CONST_METHOD0(proc_sample_rate_hz, int()); + MOCK_CONST_METHOD0(proc_split_sample_rate_hz, int()); + MOCK_CONST_METHOD0(num_input_channels, size_t()); + MOCK_CONST_METHOD0(num_proc_channels, size_t()); + MOCK_CONST_METHOD0(num_output_channels, size_t()); + MOCK_CONST_METHOD0(num_reverse_channels, size_t()); + MOCK_METHOD1(set_output_will_be_muted, void(bool muted)); + MOCK_METHOD1(ProcessStream, int(AudioFrame* frame)); + MOCK_METHOD7(ProcessStream, int(const float* const* src, + size_t samples_per_channel, + int input_sample_rate_hz, + ChannelLayout input_layout, + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest)); + MOCK_METHOD4(ProcessStream, int(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest)); + MOCK_METHOD1(ProcessReverseStream, int(AudioFrame* frame)); + MOCK_METHOD4(AnalyzeReverseStream, int(const float* const* data, + size_t samples_per_channel, + int sample_rate_hz, + ChannelLayout layout)); + MOCK_METHOD4(ProcessReverseStream, int(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest)); + MOCK_METHOD1(set_stream_delay_ms, int(int delay)); + MOCK_CONST_METHOD0(stream_delay_ms, int()); + MOCK_CONST_METHOD0(was_stream_delay_set, bool()); + MOCK_METHOD1(set_stream_key_pressed, void(bool key_pressed)); + MOCK_METHOD1(set_delay_offset_ms, void(int offset)); + MOCK_CONST_METHOD0(delay_offset_ms, int()); + + virtual void AttachAecDump(std::unique_ptr<AecDump> aec_dump) {} + MOCK_METHOD0(DetachAecDump, void()); + + MOCK_METHOD0(UpdateHistogramsOnCallEnd, void()); + MOCK_CONST_METHOD0(GetStatistics, AudioProcessingStatistics()); + MOCK_CONST_METHOD1(GetStatistics, AudioProcessingStats(bool)); + virtual MockEchoCancellation* echo_cancellation() const { + return echo_cancellation_.get(); + } + virtual MockEchoControlMobile* echo_control_mobile() const { + return echo_control_mobile_.get(); + } + virtual MockGainControl* gain_control() const { + return gain_control_.get(); + } + virtual MockHighPassFilter* high_pass_filter() const { + return high_pass_filter_.get(); + } + virtual MockLevelEstimator* level_estimator() const { + return level_estimator_.get(); + } + virtual MockNoiseSuppression* noise_suppression() const { + return noise_suppression_.get(); + } + virtual MockVoiceDetection* voice_detection() const { + return voice_detection_.get(); + } + + MOCK_CONST_METHOD0(GetConfig, AudioProcessing::Config()); + + private: + std::unique_ptr<MockEchoCancellation> echo_cancellation_; + std::unique_ptr<MockEchoControlMobile> echo_control_mobile_; + std::unique_ptr<MockGainControl> gain_control_; + std::unique_ptr<MockHighPassFilter> high_pass_filter_; + std::unique_ptr<MockLevelEstimator> level_estimator_; + std::unique_ptr<MockNoiseSuppression> noise_suppression_; + std::unique_ptr<MockVoiceDetection> voice_detection_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INCLUDE_MOCK_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc new file mode 100644 index 0000000000..0e696d9fff --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" + +#include <math.h> +#include <stdlib.h> +#include <algorithm> +#include <limits> +#include <numeric> + +#include "common_audio/include/audio_util.h" +#include "common_audio/window_generator.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +const size_t kErbResolution = 2; +const int kWindowSizeMs = 16; +const int kChunkSizeMs = 10; // Size provided by APM. +const float kClipFreqKhz = 0.2f; +const float kKbdAlpha = 1.5f; +const float kLambdaBot = -1.f; // Extreme values in bisection +const float kLambdaTop = -1e-5f; // search for lamda. +const float kVoiceProbabilityThreshold = 0.5f; +// Number of chunks after voice activity which is still considered speech. +const size_t kSpeechOffsetDelay = 10; +const float kDecayRate = 0.995f; // Power estimation decay rate. +const float kMaxRelativeGainChange = 0.005f; +const float kRho = 0.0004f; // Default production and interpretation SNR. +const float kPowerNormalizationFactor = 1.f / (1 << 30); +const float kMaxActiveSNR = 128.f; // 21dB +const float kMinInactiveSNR = 32.f; // 15dB +const size_t kGainUpdatePeriod = 10u; + +// Returns dot product of vectors |a| and |b| with size |length|. +float DotProduct(const float* a, const float* b, size_t length) { + float ret = 0.f; + for (size_t i = 0; i < length; ++i) { + ret += a[i] * b[i]; + } + return ret; +} + +// Computes the power across ERB bands from the power spectral density |pow|. +// Stores it in |result|. +void MapToErbBands(const float* pow, + const std::vector<std::vector<float>>& filter_bank, + float* result) { + for (size_t i = 0; i < filter_bank.size(); ++i) { + RTC_DCHECK_GT(filter_bank[i].size(), 0); + result[i] = kPowerNormalizationFactor * + DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); + } +} + +} // namespace + +IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, + size_t num_render_channels, + size_t num_bands, + size_t num_noise_bins) + : freqs_(RealFourier::ComplexLength( + RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), + num_noise_bins_(num_noise_bins), + chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)), + bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), + sample_rate_hz_(sample_rate_hz), + num_render_channels_(num_render_channels), + clear_power_estimator_(freqs_, kDecayRate), + noise_power_estimator_(num_noise_bins, kDecayRate), + filtered_clear_pow_(bank_size_, 0.f), + filtered_noise_pow_(num_noise_bins, 0.f), + center_freqs_(bank_size_), + capture_filter_bank_(CreateErbBank(num_noise_bins)), + render_filter_bank_(CreateErbBank(freqs_)), + gains_eq_(bank_size_), + gain_applier_(freqs_, kMaxRelativeGainChange), + audio_s16_(chunk_length_), + chunks_since_voice_(kSpeechOffsetDelay), + is_speech_(false), + snr_(kMaxActiveSNR), + is_active_(false), + num_chunks_(0u), + num_active_chunks_(0u), + noise_estimation_buffer_(num_noise_bins), + noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, + std::vector<float>(num_noise_bins), + RenderQueueItemVerifier<float>(num_noise_bins)) { + RTC_DCHECK_LE(kRho, 1.f); + + const size_t erb_index = static_cast<size_t>( + ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + + 43.f)); + start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution); + + size_t window_size = static_cast<size_t>(1) << RealFourier::FftOrder(freqs_); + std::vector<float> kbd_window(window_size); + WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, + kbd_window.data()); + render_mangler_.reset(new LappedTransform( + num_render_channels_, num_render_channels_, chunk_length_, + kbd_window.data(), window_size, window_size / 2, this)); + + const size_t initial_delay = render_mangler_->initial_delay(); + for (size_t i = 0u; i < num_bands - 1; ++i) { + high_bands_buffers_.push_back(std::unique_ptr<intelligibility::DelayBuffer>( + new intelligibility::DelayBuffer(initial_delay, num_render_channels_))); + } +} + +IntelligibilityEnhancer::~IntelligibilityEnhancer() { + // Don't rely on this log, since the destructor isn't called when the + // app/tab is killed. + if (num_chunks_ > 0) { + RTC_LOG(LS_INFO) << "Intelligibility Enhancer was active for " + << 100.f * static_cast<float>(num_active_chunks_) / + num_chunks_ + << "% of the call."; + } else { + RTC_LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk."; + } +} + +void IntelligibilityEnhancer::SetCaptureNoiseEstimate( + std::vector<float> noise, float gain) { + RTC_DCHECK_EQ(noise.size(), num_noise_bins_); + for (auto& bin : noise) { + bin *= gain; + } + // Disregarding return value since buffer overflow is acceptable, because it + // is not critical to get each noise estimate. + if (noise_estimation_queue_.Insert(&noise)) { + }; +} + +void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) { + RTC_DCHECK_EQ(num_render_channels_, audio->num_channels()); + while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { + noise_power_estimator_.Step(noise_estimation_buffer_.data()); + } + float* const* low_band = audio->split_channels_f(kBand0To8kHz); + is_speech_ = IsSpeech(low_band[0]); + render_mangler_->ProcessChunk(low_band, low_band); + DelayHighBands(audio); +} + +void IntelligibilityEnhancer::ProcessAudioBlock( + const std::complex<float>* const* in_block, + size_t in_channels, + size_t frames, + size_t /* out_channels */, + std::complex<float>* const* out_block) { + RTC_DCHECK_EQ(freqs_, frames); + if (is_speech_) { + clear_power_estimator_.Step(in_block[0]); + } + SnrBasedEffectActivation(); + ++num_chunks_; + if (is_active_) { + ++num_active_chunks_; + if (num_chunks_ % kGainUpdatePeriod == 0) { + MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, + filtered_clear_pow_.data()); + MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, + filtered_noise_pow_.data()); + SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); + const float power_target = std::accumulate( + filtered_clear_pow_.data(), + filtered_clear_pow_.data() + bank_size_, + 0.f); + const float power_top = + DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); + SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); + const float power_bot = + DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); + if (power_target >= power_bot && power_target <= power_top) { + SolveForLambda(power_target); + UpdateErbGains(); + } // Else experiencing power underflow, so do nothing. + } + } + for (size_t i = 0; i < in_channels; ++i) { + gain_applier_.Apply(in_block[i], out_block[i]); + } +} + +void IntelligibilityEnhancer::SnrBasedEffectActivation() { + const float* clear_psd = clear_power_estimator_.power().data(); + const float* noise_psd = noise_power_estimator_.power().data(); + const float clear_power = + std::accumulate(clear_psd, clear_psd + freqs_, 0.f); + const float noise_power = + std::accumulate(noise_psd, noise_psd + freqs_, 0.f); + snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power / + (noise_power + std::numeric_limits<float>::epsilon()); + if (is_active_) { + if (snr_ > kMaxActiveSNR) { + RTC_LOG(LS_INFO) << "Intelligibility Enhancer was deactivated at chunk " + << num_chunks_; + is_active_ = false; + // Set the target gains to unity. + float* gains = gain_applier_.target(); + for (size_t i = 0; i < freqs_; ++i) { + gains[i] = 1.f; + } + } + } else { + if (snr_ < kMinInactiveSNR) { + RTC_LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk " + << num_chunks_; + is_active_ = true; + } + } +} + +void IntelligibilityEnhancer::SolveForLambda(float power_target) { + const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values + const int kMaxIters = 100; // for these, based on experiments. + + const float reciprocal_power_target = + 1.f / (power_target + std::numeric_limits<float>::epsilon()); + float lambda_bot = kLambdaBot; + float lambda_top = kLambdaTop; + float power_ratio = 2.f; // Ratio of achieved power to target power. + int iters = 0; + while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { + const float lambda = (lambda_bot + lambda_top) / 2.f; + SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data()); + const float power = + DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); + if (power < power_target) { + lambda_bot = lambda; + } else { + lambda_top = lambda; + } + power_ratio = std::fabs(power * reciprocal_power_target); + ++iters; + } +} + +void IntelligibilityEnhancer::UpdateErbGains() { + // (ERB gain) = filterbank' * (freq gain) + float* gains = gain_applier_.target(); + for (size_t i = 0; i < freqs_; ++i) { + gains[i] = 0.f; + for (size_t j = 0; j < bank_size_; ++j) { + gains[i] += render_filter_bank_[j][i] * gains_eq_[j]; + } + } +} + +size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, + size_t erb_resolution) { + float freq_limit = sample_rate / 2000.f; + size_t erb_scale = static_cast<size_t>(ceilf( + 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f)); + return erb_scale * erb_resolution; +} + +std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank( + size_t num_freqs) { + std::vector<std::vector<float>> filter_bank(bank_size_); + size_t lf = 1, rf = 4; + + for (size_t i = 0; i < bank_size_; ++i) { + float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution)); + center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); + center_freqs_[i] -= 14678.49f; + } + float last_center_freq = center_freqs_[bank_size_ - 1]; + for (size_t i = 0; i < bank_size_; ++i) { + center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; + } + + for (size_t i = 0; i < bank_size_; ++i) { + filter_bank[i].resize(num_freqs); + } + + for (size_t i = 1; i <= bank_size_; ++i) { + size_t lll = static_cast<size_t>( + round(center_freqs_[rtc::SafeMax<size_t>(1, i - lf) - 1] * num_freqs / + (0.5f * sample_rate_hz_))); + size_t ll = static_cast<size_t>( + round(center_freqs_[rtc::SafeMax<size_t>(1, i) - 1] * num_freqs / + (0.5f * sample_rate_hz_))); + lll = rtc::SafeClamp<size_t>(lll, 1, num_freqs) - 1; + ll = rtc::SafeClamp<size_t>(ll, 1, num_freqs) - 1; + + size_t rrr = static_cast<size_t>( + round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + rf) - 1] * + num_freqs / (0.5f * sample_rate_hz_))); + size_t rr = static_cast<size_t>( + round(center_freqs_[rtc::SafeMin<size_t>(bank_size_, i + 1) - 1] * + num_freqs / (0.5f * sample_rate_hz_))); + rrr = rtc::SafeClamp<size_t>(rrr, 1, num_freqs) - 1; + rr = rtc::SafeClamp<size_t>(rr, 1, num_freqs) - 1; + + float step = ll == lll ? 0.f : 1.f / (ll - lll); + float element = 0.f; + for (size_t j = lll; j <= ll; ++j) { + filter_bank[i - 1][j] = element; + element += step; + } + step = rr == rrr ? 0.f : 1.f / (rrr - rr); + element = 1.f; + for (size_t j = rr; j <= rrr; ++j) { + filter_bank[i - 1][j] = element; + element -= step; + } + for (size_t j = ll; j <= rr; ++j) { + filter_bank[i - 1][j] = 1.f; + } + } + + for (size_t i = 0; i < num_freqs; ++i) { + float sum = 0.f; + for (size_t j = 0; j < bank_size_; ++j) { + sum += filter_bank[j][i]; + } + for (size_t j = 0; j < bank_size_; ++j) { + filter_bank[j][i] /= sum; + } + } + return filter_bank; +} + +void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, + size_t start_freq, + float* sols) { + const float kMinPower = 1e-5f; + + const float* pow_x0 = filtered_clear_pow_.data(); + const float* pow_n0 = filtered_noise_pow_.data(); + + for (size_t n = 0; n < start_freq; ++n) { + sols[n] = 1.f; + } + + // Analytic solution for optimal gains. See paper for derivation. + for (size_t n = start_freq; n < bank_size_; ++n) { + if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { + sols[n] = 1.f; + } else { + const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + + lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; + const float beta0 = + lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n]; + const float alpha0 = + lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n]; + RTC_DCHECK_LT(alpha0, 0.f); + // The quadratic equation should always have real roots, but to guard + // against numerical errors we limit it to a minimum of zero. + sols[n] = std::max( + 0.f, (-beta0 - std::sqrt(std::max( + 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) / + (2.f * alpha0)); + } + } +} + +bool IntelligibilityEnhancer::IsSpeech(const float* audio) { + FloatToS16(audio, chunk_length_, audio_s16_.data()); + vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); + if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { + chunks_since_voice_ = 0; + } else if (chunks_since_voice_ < kSpeechOffsetDelay) { + ++chunks_since_voice_; + } + return chunks_since_voice_ < kSpeechOffsetDelay; +} + +void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) { + RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1); + for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) { + Band band = static_cast<Band>(i + 1); + high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h new file mode 100644 index 0000000000..3e0e269c58 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ +#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ + +#include <complex> +#include <memory> +#include <vector> + +#include "common_audio/channel_buffer.h" +#include "common_audio/lapped_transform.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/intelligibility/intelligibility_utils.h" +#include "modules/audio_processing/render_queue_item_verifier.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" +#include "rtc_base/swap_queue.h" + +namespace webrtc { + +// Speech intelligibility enhancement module. Reads render and capture +// audio streams and modifies the render stream with a set of gains per +// frequency bin to enhance speech against the noise background. +// Details of the model and algorithm can be found in the original paper: +// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 +class IntelligibilityEnhancer : public LappedTransform::Callback { + public: + IntelligibilityEnhancer(int sample_rate_hz, + size_t num_render_channels, + size_t num_bands, + size_t num_noise_bins); + + ~IntelligibilityEnhancer() override; + + // Sets the capture noise magnitude spectrum estimate. + void SetCaptureNoiseEstimate(std::vector<float> noise, float gain); + + // Reads chunk of speech in time domain and updates with modified signal. + void ProcessRenderAudio(AudioBuffer* audio); + bool active() const; + + protected: + // All in frequency domain, receives input |in_block|, applies + // intelligibility enhancement, and writes result to |out_block|. + void ProcessAudioBlock(const std::complex<float>* const* in_block, + size_t in_channels, + size_t frames, + size_t out_channels, + std::complex<float>* const* out_block) override; + + private: + FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate); + FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); + FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); + FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, + TestNoiseGainHasExpectedResult); + FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, + TestAllBandsHaveSameDelay); + + // Updates the SNR estimation and enables or disables this component using a + // hysteresis. + void SnrBasedEffectActivation(); + + // Bisection search for optimal |lambda|. + void SolveForLambda(float power_target); + + // Transforms freq gains to ERB gains. + void UpdateErbGains(); + + // Returns number of ERB filters. + static size_t GetBankSize(int sample_rate, size_t erb_resolution); + + // Initializes ERB filterbank. + std::vector<std::vector<float>> CreateErbBank(size_t num_freqs); + + // Analytically solves quadratic for optimal gains given |lambda|. + // Negative gains are set to 0. Stores the results in |sols|. + void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); + + // Returns true if the audio is speech. + bool IsSpeech(const float* audio); + + // Delays the high bands to compensate for the processing delay in the low + // band. + void DelayHighBands(AudioBuffer* audio); + + static const size_t kMaxNumNoiseEstimatesToBuffer = 5; + + const size_t freqs_; // Num frequencies in frequency domain. + const size_t num_noise_bins_; + const size_t chunk_length_; // Chunk size in samples. + const size_t bank_size_; // Num ERB filters. + const int sample_rate_hz_; + const size_t num_render_channels_; + + intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_; + intelligibility::PowerEstimator<float> noise_power_estimator_; + std::vector<float> filtered_clear_pow_; + std::vector<float> filtered_noise_pow_; + std::vector<float> center_freqs_; + std::vector<std::vector<float>> capture_filter_bank_; + std::vector<std::vector<float>> render_filter_bank_; + size_t start_freq_; + + std::vector<float> gains_eq_; // Pre-filter modified gains. + intelligibility::GainApplier gain_applier_; + + std::unique_ptr<LappedTransform> render_mangler_; + + VoiceActivityDetector vad_; + std::vector<int16_t> audio_s16_; + size_t chunks_since_voice_; + bool is_speech_; + float snr_; + bool is_active_; + + unsigned long int num_chunks_; + unsigned long int num_active_chunks_; + + std::vector<float> noise_estimation_buffer_; + SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>> + noise_estimation_queue_; + + std::vector<std::unique_ptr<intelligibility::DelayBuffer>> + high_bands_buffers_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc new file mode 100644 index 0000000000..98a8dae469 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc @@ -0,0 +1,536 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include <stdlib.h> + +#include <algorithm> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" +#include "modules/audio_processing/noise_suppression_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "rtc_base/arraysize.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +// Target output for ERB create test. Generated with matlab. +const float kTestCenterFreqs[] = { + 14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f, + 137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f, + 309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f, + 551.371f, 593.293f, 637.386f, 683.77f, 732.581f, 783.96f, 838.06f, + 895.046f, 955.09f, 1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f, + 1391.22f, 1478.83f, 1571.5f, 1669.55f, 1773.37f, 1883.37f, 2000.f}; +const float kTestFilterBank[][33] = { + {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f, + 0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, + 0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, + 0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, + 0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, + 0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.307692f, 0.333333f, + 0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.166667f, 0.363636f, 0.333333f, 0.242424f, + 0.190476f, 0.133333f, 0.0689655f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f, + 0.0714286f, 0.f, 0.f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f, + 0.125f, 0.0655738f, 0.f, 0.f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.15873f, 0.333333f, 0.344828f, 0.357143f, + 0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.172414f, 0.357143f, + 0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f}, + {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}}; +static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank), + "Test filterbank badly initialized."); + +// Target output for gain solving test. Generated with matlab. +const size_t kTestStartFreq = 12; // Lowest integral frequency for ERBs. +const float kTestZeroVar = 1.f; +const float kTestNonZeroVarLambdaTop[] = { + 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; +static_assert(arraysize(kTestCenterFreqs) == + arraysize(kTestNonZeroVarLambdaTop), + "Power test data badly initialized."); +const float kMaxTestError = 0.005f; + +// Enhancer initialization parameters. +const int kSamples = 10000; +const int kSampleRate = 4000; +const int kNumChannels = 1; +const int kFragmentSize = kSampleRate / 100; +const size_t kNumNoiseBins = 129; +const size_t kNumBands = 1; + +// Number of frames to process in the bitexactness tests. +const size_t kNumFramesToProcess = 1000; + +int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { + return (sample_rate_hz > AudioProcessing::kSampleRate16kHz + ? AudioProcessing::kSampleRate16kHz + : sample_rate_hz); +} + +// Process one frame of data and produce the output. +void ProcessOneFrame(int sample_rate_hz, + AudioBuffer* render_audio_buffer, + AudioBuffer* capture_audio_buffer, + NoiseSuppressionImpl* noise_suppressor, + IntelligibilityEnhancer* intelligibility_enhancer) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->SplitIntoFrequencyBands(); + capture_audio_buffer->SplitIntoFrequencyBands(); + } + + intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer); + + noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); + noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); + + intelligibility_enhancer->SetCaptureNoiseEstimate( + noise_suppressor->NoiseEstimate(), 0); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + render_audio_buffer->MergeFrequencyBands(); + } +} + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + rtc::ArrayView<const float> output_reference) { + const StreamConfig render_config(sample_rate_hz, num_channels, false); + AudioBuffer render_buffer( + render_config.num_frames(), render_config.num_channels(), + render_config.num_frames(), render_config.num_channels(), + render_config.num_frames()); + test::InputAudioFile render_file( + test::GetApmRenderTestVectorFileName(sample_rate_hz)); + std::vector<float> render_input(render_buffer.num_frames() * + render_buffer.num_channels()); + + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(render_buffer.num_frames() * + capture_buffer.num_channels()); + + rtc::CriticalSection crit_capture; + NoiseSuppressionImpl noise_suppressor(&crit_capture); + noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); + noise_suppressor.Enable(true); + + IntelligibilityEnhancer intelligibility_enhancer( + IntelligibilityEnhancerSampleRate(sample_rate_hz), + render_config.num_channels(), kNumBands, + NoiseSuppressionImpl::num_noise_bins()); + + for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), + render_buffer.num_channels(), &render_file, + render_input); + ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), + capture_buffer.num_channels(), &capture_file, + capture_input); + + test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer, + &noise_suppressor, &intelligibility_enhancer); + } + + // Extract and verify the test results. + std::vector<float> render_output; + test::ExtractVectorFromAudioBuffer(render_config, &render_buffer, + &render_output); + + const float kElementErrorBound = 1.f / static_cast<float>(1 << 15); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + EXPECT_TRUE(test::VerifyDeinterleavedArray( + render_buffer.num_frames(), render_config.num_channels(), + output_reference, render_output, kElementErrorBound)); +} + +float float_rand() { + return std::rand() * 2.f / RAND_MAX - 1; +} + +} // namespace + +class IntelligibilityEnhancerTest : public ::testing::Test { + protected: + IntelligibilityEnhancerTest() + : clear_buffer_(kFragmentSize, + kNumChannels, + kFragmentSize, + kNumChannels, + kFragmentSize), + stream_config_(kSampleRate, kNumChannels), + clear_data_(kSamples), + noise_data_(kNumNoiseBins), + orig_data_(kSamples) { + std::srand(1); + enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, + kNumNoiseBins)); + } + + bool CheckUpdate() { + enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, + kNumNoiseBins)); + float* clear_cursor = clear_data_.data(); + for (int i = 0; i < kSamples; i += kFragmentSize) { + enh_->SetCaptureNoiseEstimate(noise_data_, 1); + clear_buffer_.CopyFrom(&clear_cursor, stream_config_); + enh_->ProcessRenderAudio(&clear_buffer_); + clear_buffer_.CopyTo(stream_config_, &clear_cursor); + clear_cursor += kFragmentSize; + } + for (int i = initial_delay_; i < kSamples; i++) { + if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) > + kMaxTestError) { + return true; + } + } + return false; + } + + std::unique_ptr<IntelligibilityEnhancer> enh_; + // Render clean speech buffer. + AudioBuffer clear_buffer_; + StreamConfig stream_config_; + std::vector<float> clear_data_; + std::vector<float> noise_data_; + std::vector<float> orig_data_; + size_t initial_delay_; +}; + +// For each class of generated data, tests that render stream is updated when +// it should be. +TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { + initial_delay_ = enh_->render_mangler_->initial_delay(); + std::fill(noise_data_.begin(), noise_data_.end(), 0.f); + std::fill(orig_data_.begin(), orig_data_.end(), 0.f); + std::fill(clear_data_.begin(), clear_data_.end(), 0.f); + EXPECT_FALSE(CheckUpdate()); + std::generate(clear_data_.begin(), clear_data_.end(), float_rand); + orig_data_ = clear_data_; + EXPECT_FALSE(CheckUpdate()); + std::generate(clear_data_.begin(), clear_data_.end(), float_rand); + orig_data_ = clear_data_; + std::generate(noise_data_.begin(), noise_data_.end(), float_rand); + FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data()); + EXPECT_TRUE(CheckUpdate()); +} + +// Tests ERB bank creation, comparing against matlab output. +TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { + ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); + for (size_t i = 0; i < enh_->bank_size_; ++i) { + EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); + ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); + for (size_t j = 0; j < enh_->freqs_; ++j) { + EXPECT_NEAR(kTestFilterBank[i][j], enh_->render_filter_bank_[i][j], + kMaxTestError); + } + } +} + +// Tests analytic solution for optimal gains, comparing +// against matlab output. +TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) { + ASSERT_EQ(kTestStartFreq, enh_->start_freq_); + std::vector<float> sols(enh_->bank_size_); + float lambda = -0.001f; + for (size_t i = 0; i < enh_->bank_size_; i++) { + enh_->filtered_clear_pow_[i] = 0.f; + enh_->filtered_noise_pow_[i] = 0.f; + } + enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); + for (size_t i = 0; i < enh_->bank_size_; i++) { + EXPECT_NEAR(kTestZeroVar, sols[i], kMaxTestError); + } + for (size_t i = 0; i < enh_->bank_size_; i++) { + enh_->filtered_clear_pow_[i] = static_cast<float>(i + 1); + enh_->filtered_noise_pow_[i] = static_cast<float>(enh_->bank_size_ - i); + } + enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); + for (size_t i = 0; i < enh_->bank_size_; i++) { + EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); + } + lambda = -1.f; + enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); + for (size_t i = 0; i < enh_->bank_size_; i++) { + EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); + } +} + +TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) { + const float kGain = 2.f; + const float kTolerance = 0.007f; + std::vector<float> noise(kNumNoiseBins); + std::vector<float> noise_psd(kNumNoiseBins); + std::generate(noise.begin(), noise.end(), float_rand); + for (size_t i = 0; i < kNumNoiseBins; ++i) { + noise_psd[i] = kGain * kGain * noise[i] * noise[i]; + } + float* clear_cursor = clear_data_.data(); + for (size_t i = 0; i < kNumFramesToProcess; ++i) { + enh_->SetCaptureNoiseEstimate(noise, kGain); + clear_buffer_.CopyFrom(&clear_cursor, stream_config_); + enh_->ProcessRenderAudio(&clear_buffer_); + } + const std::vector<float>& estimated_psd = + enh_->noise_power_estimator_.power(); + for (size_t i = 0; i < kNumNoiseBins; ++i) { + EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], + kTolerance); + } +} + +TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) { + const int kTestSampleRate = AudioProcessing::kSampleRate32kHz; + const int kTestSplitRate = AudioProcessing::kSampleRate16kHz; + const size_t kTestNumBands = + rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate); + const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100); + const size_t kTestSplitFragmentSize = + rtc::CheckedDivExact(kTestSplitRate, 100); + enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels, + kTestNumBands, kNumNoiseBins)); + size_t initial_delay = enh_->render_mangler_->initial_delay(); + std::vector<float> rand_gen_buf(kTestFragmentSize); + AudioBuffer original_buffer(kTestFragmentSize, kNumChannels, + kTestFragmentSize, kNumChannels, + kTestFragmentSize); + AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize, + kNumChannels, kTestFragmentSize); + for (size_t i = 0u; i < kTestNumBands; ++i) { + std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand); + original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(), + rand_gen_buf.size()); + audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(), + rand_gen_buf.size()); + } + enh_->ProcessRenderAudio(&audio_buffer); + for (size_t i = 0u; i < kTestNumBands; ++i) { + const float* original_ptr = original_buffer.split_bands_const_f(0)[i]; + const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i]; + for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) { + EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]), + kMaxTestError); + } + } +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { + const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference); +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) { + const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference); +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) { + const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference); +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) { + const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference); +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) { + const float kOutputReference[] = {0.021454f, 0.035919f, 0.026428f, + -0.000641f, 0.000366f, 0.000641f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference); +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) { + const float kOutputReference[] = {0.021362f, 0.035736f, 0.023895f, + -0.001404f, -0.001465f, 0.000549f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference); +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) { + const float kOutputReference[] = {0.030641f, 0.027406f, 0.028321f, + -0.001343f, -0.004578f, 0.000977f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference); +} + +TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { + const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, + -0.012975f, -0.015940f, -0.017820f}; + + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc new file mode 100644 index 0000000000..b6917f4407 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/intelligibility/intelligibility_utils.h" + +#include <math.h> +#include <stdlib.h> +#include <string.h> +#include <algorithm> +#include <limits> + +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace intelligibility { + +namespace { + +const float kMinFactor = 0.01f; +const float kMaxFactor = 100.f; + +// Return |current| changed towards |target|, with the relative change being at +// most |limit|. +float UpdateFactor(float target, float current, float limit) { + const float gain = target / (current + std::numeric_limits<float>::epsilon()); + const float clamped_gain = rtc::SafeClamp(gain, 1 - limit, 1 + limit); + return rtc::SafeClamp(current * clamped_gain, kMinFactor, kMaxFactor); +} + +} // namespace + +template<typename T> +PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay) + : power_(num_freqs, 0.f), decay_(decay) {} + +template<typename T> +void PowerEstimator<T>::Step(const T* data) { + for (size_t i = 0; i < power_.size(); ++i) { + power_[i] = decay_ * power_[i] + + (1.f - decay_) * std::abs(data[i]) * std::abs(data[i]); + } +} + +template class PowerEstimator<float>; +template class PowerEstimator<std::complex<float>>; + +GainApplier::GainApplier(size_t freqs, float relative_change_limit) + : num_freqs_(freqs), + relative_change_limit_(relative_change_limit), + target_(freqs, 1.f), + current_(freqs, 1.f) {} + +GainApplier::~GainApplier() {} + +void GainApplier::Apply(const std::complex<float>* in_block, + std::complex<float>* out_block) { + for (size_t i = 0; i < num_freqs_; ++i) { + current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_); + out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i]; + } +} + +DelayBuffer::DelayBuffer(size_t delay, size_t num_channels) + : buffer_(num_channels, std::vector<float>(delay, 0.f)), read_index_(0u) {} + +DelayBuffer::~DelayBuffer() {} + +void DelayBuffer::Delay(float* const* data, size_t length) { + size_t sample_index = read_index_; + for (size_t i = 0u; i < buffer_.size(); ++i) { + sample_index = read_index_; + for (size_t j = 0u; j < length; ++j) { + float swap = data[i][j]; + data[i][j] = buffer_[i][sample_index]; + buffer_[i][sample_index] = swap; + if (++sample_index == buffer_.size()) { + sample_index = 0u; + } + } + } + read_index_ = sample_index; +} + +} // namespace intelligibility + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h new file mode 100644 index 0000000000..4dc17d50b5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ + +#include <complex> +#include <vector> + +namespace webrtc { + +namespace intelligibility { + +// Internal helper for computing the power of a stream of arrays. +// The result is an array of power per position: the i-th power is the power of +// the stream of data on the i-th positions in the input arrays. +template <typename T> +class PowerEstimator { + public: + // Construct an instance for the given input array length (|freqs|), with the + // appropriate parameters. |decay| is the forgetting factor. + PowerEstimator(size_t freqs, float decay); + + // Add a new data point to the series. + void Step(const T* data); + + // The current power array. + const std::vector<float>& power() { return power_; }; + + private: + // The current power array. + std::vector<float> power_; + + const float decay_; +}; + +// Helper class for smoothing gain changes. On each application step, the +// currently used gains are changed towards a set of settable target gains, +// constrained by a limit on the relative changes. +class GainApplier { + public: + GainApplier(size_t freqs, float relative_change_limit); + + ~GainApplier(); + + // Copy |in_block| to |out_block|, multiplied by the current set of gains, + // and step the current set of gains towards the target set. + void Apply(const std::complex<float>* in_block, + std::complex<float>* out_block); + + // Return the current target gain set. Modify this array to set the targets. + float* target() { return target_.data(); } + + private: + const size_t num_freqs_; + const float relative_change_limit_; + std::vector<float> target_; + std::vector<float> current_; +}; + +// Helper class to delay a signal by an integer number of samples. +class DelayBuffer { + public: + DelayBuffer(size_t delay, size_t num_channels); + + ~DelayBuffer(); + + void Delay(float* const* data, size_t length); + + private: + std::vector<std::vector<float>> buffer_; + size_t read_index_; +}; + +} // namespace intelligibility + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc new file mode 100644 index 0000000000..fea394c338 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <cmath> +#include <complex> +#include <vector> + +#include "modules/audio_processing/intelligibility/intelligibility_utils.h" +#include "rtc_base/arraysize.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace intelligibility { + +std::vector<std::vector<std::complex<float>>> GenerateTestData(size_t freqs, + size_t samples) { + std::vector<std::vector<std::complex<float>>> data(samples); + for (size_t i = 0; i < samples; ++i) { + for (size_t j = 0; j < freqs; ++j) { + const float val = 0.99f / ((i + 1) * (j + 1)); + data[i].push_back(std::complex<float>(val, val)); + } + } + return data; +} + +// Tests PowerEstimator, for all power step types. +TEST(IntelligibilityUtilsTest, TestPowerEstimator) { + const size_t kFreqs = 10; + const size_t kSamples = 100; + const float kDecay = 0.5f; + const std::vector<std::vector<std::complex<float>>> test_data( + GenerateTestData(kFreqs, kSamples)); + PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay); + EXPECT_EQ(0, power_estimator.power()[0]); + + // Makes sure Step is doing something. + power_estimator.Step(test_data[0].data()); + for (size_t i = 1; i < kSamples; ++i) { + power_estimator.Step(test_data[i].data()); + for (size_t j = 0; j < kFreqs; ++j) { + EXPECT_GE(power_estimator.power()[j], 0.f); + EXPECT_LE(power_estimator.power()[j], 1.f); + } + } +} + +// Tests gain applier. +TEST(IntelligibilityUtilsTest, TestGainApplier) { + const size_t kFreqs = 10; + const size_t kSamples = 100; + const float kChangeLimit = 0.1f; + GainApplier gain_applier(kFreqs, kChangeLimit); + const std::vector<std::vector<std::complex<float>>> in_data( + GenerateTestData(kFreqs, kSamples)); + std::vector<std::vector<std::complex<float>>> out_data( + GenerateTestData(kFreqs, kSamples)); + for (size_t i = 0; i < kSamples; ++i) { + gain_applier.Apply(in_data[i].data(), out_data[i].data()); + for (size_t j = 0; j < kFreqs; ++j) { + EXPECT_GT(out_data[i][j].real(), 0.f); + EXPECT_LT(out_data[i][j].real(), 1.f); + EXPECT_GT(out_data[i][j].imag(), 0.f); + EXPECT_LT(out_data[i][j].imag(), 1.f); + } + } +} + +} // namespace intelligibility + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc new file mode 100644 index 0000000000..b90449caa3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "common_audio/channel_buffer.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" +#include "modules/audio_processing/noise_suppression_impl.h" +#include "rtc_base/criticalsection.h" +#include "rtc_base/flags.h" + +using std::complex; + +namespace webrtc { +namespace { + +DEFINE_string(clear_file, "speech.wav", "Input file with clear speech."); +DEFINE_string(noise_file, "noise.wav", "Input file with noise data."); +DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file."); +DEFINE_bool(help, false, "Print this message."); + +int int_main(int argc, char* argv[]) { + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) { + return 1; + } + if (FLAG_help) { + rtc::FlagList::Print(nullptr, false); + return 0; + } + if (argc != 1) { + printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n"); + return 0; + } + + WavReader in_file(FLAG_clear_file); + WavReader noise_file(FLAG_noise_file); + WavWriter out_file(FLAG_out_file, in_file.sample_rate(), + in_file.num_channels()); + rtc::CriticalSection crit; + NoiseSuppressionImpl ns(&crit); + IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u, + NoiseSuppressionImpl::num_noise_bins()); + ns.Initialize(noise_file.num_channels(), noise_file.sample_rate()); + ns.Enable(true); + const size_t in_samples = noise_file.sample_rate() / 100; + const size_t noise_samples = noise_file.sample_rate() / 100; + std::vector<float> in(in_samples * in_file.num_channels()); + std::vector<float> noise(noise_samples * noise_file.num_channels()); + ChannelBuffer<float> in_buf(in_samples, in_file.num_channels()); + ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels()); + AudioBuffer capture_audio(noise_samples, noise_file.num_channels(), + noise_samples, noise_file.num_channels(), + noise_samples); + AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples, + in_file.num_channels(), in_samples); + StreamConfig noise_config(noise_file.sample_rate(), + noise_file.num_channels()); + StreamConfig in_config(in_file.sample_rate(), in_file.num_channels()); + while (in_file.ReadSamples(in.size(), in.data()) == in.size() && + noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) { + FloatS16ToFloat(noise.data(), noise.size(), noise.data()); + FloatS16ToFloat(in.data(), in.size(), in.data()); + Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(), + in_buf.channels()); + Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(), + noise_buf.channels()); + capture_audio.CopyFrom(noise_buf.channels(), noise_config); + render_audio.CopyFrom(in_buf.channels(), in_config); + ns.AnalyzeCaptureAudio(&capture_audio); + ns.ProcessCaptureAudio(&capture_audio); + enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1); + enh.ProcessRenderAudio(&render_audio); + render_audio.CopyTo(in_config, in_buf.channels()); + Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(), + in.data()); + FloatToFloatS16(in.data(), in.size(), in.data()); + out_file.WriteSamples(in.data(), in.size()); + } + + return 0; +} + +} // namespace +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::int_main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/biquad_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/biquad_filter.cc new file mode 100644 index 0000000000..5a4ddc891e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/biquad_filter.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/biquad_filter.h" + +namespace webrtc { + +// This method applies a biquad filter to an input signal x to produce an +// output signal y. The biquad coefficients are specified at the construction +// of the object. +void BiQuadFilter::Process(rtc::ArrayView<const float> x, + rtc::ArrayView<float> y) { + for (size_t k = 0; k < x.size(); ++k) { + // Use temporary variable for x[k] to allow in-place function call + // (that x and y refer to the same array). + const float tmp = x[k]; + y[k] = coefficients_.b[0] * tmp + coefficients_.b[1] * biquad_state_.b[0] + + coefficients_.b[2] * biquad_state_.b[1] - + coefficients_.a[0] * biquad_state_.a[0] - + coefficients_.a[1] * biquad_state_.a[1]; + biquad_state_.b[1] = biquad_state_.b[0]; + biquad_state_.b[0] = tmp; + biquad_state_.a[1] = biquad_state_.a[0]; + biquad_state_.a[0] = y[k]; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/biquad_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/biquad_filter.h new file mode 100644 index 0000000000..dad104d43f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/biquad_filter.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ + +#include <vector> + +#include "api/array_view.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class BiQuadFilter { + public: + struct BiQuadCoefficients { + float b[3]; + float a[2]; + }; + + BiQuadFilter() = default; + + void Initialize(const BiQuadCoefficients& coefficients) { + coefficients_ = coefficients; + } + + // Produces a filtered output y of the input x. Both x and y need to + // have the same length. + void Process(rtc::ArrayView<const float> x, rtc::ArrayView<float> y); + + private: + struct BiQuadState { + BiQuadState() { + std::fill(b, b + arraysize(b), 0.f); + std::fill(a, a + arraysize(a), 0.f); + } + + float b[2]; + float a[2]; + }; + + BiQuadState biquad_state_; + BiQuadCoefficients coefficients_; + + RTC_DISALLOW_COPY_AND_ASSIGN(BiQuadFilter); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/down_sampler.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/down_sampler.cc new file mode 100644 index 0000000000..a1702f432c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/down_sampler.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/down_sampler.h" + +#include <string.h> +#include <algorithm> +#include <vector> + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/biquad_filter.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Bandlimiter coefficients computed based on that only +// the first 40 bins of the spectrum for the downsampled +// signal are used. +// [B,A] = butter(2,(41/64*4000)/8000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_16kHz = { + {0.1455f, 0.2911f, 0.1455f}, + {-0.6698f, 0.2520f}}; + +// [B,A] = butter(2,(41/64*4000)/16000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_32kHz = { + {0.0462f, 0.0924f, 0.0462f}, + {-1.3066f, 0.4915f}}; + +// [B,A] = butter(2,(41/64*4000)/24000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_48kHz = { + {0.0226f, 0.0452f, 0.0226f}, + {-1.5320f, 0.6224f}}; + +} // namespace + +DownSampler::DownSampler(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) { + Initialize(48000); +} +void DownSampler::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + + sample_rate_hz_ = sample_rate_hz; + down_sampling_factor_ = rtc::CheckedDivExact(sample_rate_hz_, 8000); + + /// Note that the down sampling filter is not used if the sample rate is 8 + /// kHz. + if (sample_rate_hz_ == AudioProcessing::kSampleRate16kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_16kHz); + } else if (sample_rate_hz_ == AudioProcessing::kSampleRate32kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_32kHz); + } else if (sample_rate_hz_ == AudioProcessing::kSampleRate48kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_48kHz); + } +} + +void DownSampler::DownSample(rtc::ArrayView<const float> in, + rtc::ArrayView<float> out) { + data_dumper_->DumpWav("lc_down_sampler_input", in, sample_rate_hz_, 1); + RTC_DCHECK_EQ(sample_rate_hz_ * AudioProcessing::kChunkSizeMs / 1000, + in.size()); + RTC_DCHECK_EQ( + AudioProcessing::kSampleRate8kHz * AudioProcessing::kChunkSizeMs / 1000, + out.size()); + const size_t kMaxNumFrames = + AudioProcessing::kSampleRate48kHz * AudioProcessing::kChunkSizeMs / 1000; + float x[kMaxNumFrames]; + + // Band-limit the signal to 4 kHz. + if (sample_rate_hz_ != AudioProcessing::kSampleRate8kHz) { + low_pass_filter_.Process(in, rtc::ArrayView<float>(x, in.size())); + + // Downsample the signal. + size_t k = 0; + for (size_t j = 0; j < out.size(); ++j) { + RTC_DCHECK_GT(kMaxNumFrames, k); + out[j] = x[k]; + k += down_sampling_factor_; + } + } else { + std::copy(in.data(), in.data() + in.size(), out.data()); + } + + data_dumper_->DumpWav("lc_down_sampler_output", out, + AudioProcessing::kSampleRate8kHz, 1); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/down_sampler.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/down_sampler.h new file mode 100644 index 0000000000..d6502425a1 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/down_sampler.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ + +#include "api/array_view.h" +#include "modules/audio_processing/level_controller/biquad_filter.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class DownSampler { + public: + explicit DownSampler(ApmDataDumper* data_dumper); + void Initialize(int sample_rate_hz); + + void DownSample(rtc::ArrayView<const float> in, rtc::ArrayView<float> out); + + private: + ApmDataDumper* data_dumper_; + int sample_rate_hz_; + int down_sampling_factor_; + BiQuadFilter low_pass_filter_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(DownSampler); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_applier.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_applier.cc new file mode 100644 index 0000000000..018f809e01 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_applier.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/gain_applier.h" + +#include <algorithm> + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { + +const float kMaxSampleValue = 32767.f; +const float kMinSampleValue = -32767.f; + +int CountSaturations(rtc::ArrayView<const float> in) { + return std::count_if(in.begin(), in.end(), [](const float& v) { + return v >= kMaxSampleValue || v <= kMinSampleValue; + }); +} + +int CountSaturations(const AudioBuffer& audio) { + int num_saturations = 0; + for (size_t k = 0; k < audio.num_channels(); ++k) { + num_saturations += CountSaturations(rtc::ArrayView<const float>( + audio.channels_const_f()[k], audio.num_frames())); + } + return num_saturations; +} + +void LimitToAllowedRange(rtc::ArrayView<float> x) { + for (auto& v : x) { + v = std::max(kMinSampleValue, v); + v = std::min(kMaxSampleValue, v); + } +} + +void LimitToAllowedRange(AudioBuffer* audio) { + for (size_t k = 0; k < audio->num_channels(); ++k) { + LimitToAllowedRange( + rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames())); + } +} + +float ApplyIncreasingGain(float new_gain, + float old_gain, + float step_size, + rtc::ArrayView<float> x) { + RTC_DCHECK_LT(0.f, step_size); + float gain = old_gain; + for (auto& v : x) { + gain = std::min(new_gain, gain + step_size); + v *= gain; + } + return gain; +} + +float ApplyDecreasingGain(float new_gain, + float old_gain, + float step_size, + rtc::ArrayView<float> x) { + RTC_DCHECK_GT(0.f, step_size); + float gain = old_gain; + for (auto& v : x) { + gain = std::max(new_gain, gain + step_size); + v *= gain; + } + return gain; +} + +float ApplyConstantGain(float gain, rtc::ArrayView<float> x) { + for (auto& v : x) { + v *= gain; + } + + return gain; +} + +float ApplyGain(float new_gain, + float old_gain, + float increase_step_size, + float decrease_step_size, + rtc::ArrayView<float> x) { + RTC_DCHECK_LT(0.f, increase_step_size); + RTC_DCHECK_GT(0.f, decrease_step_size); + if (new_gain == old_gain) { + return ApplyConstantGain(new_gain, x); + } else if (new_gain > old_gain) { + return ApplyIncreasingGain(new_gain, old_gain, increase_step_size, x); + } else { + return ApplyDecreasingGain(new_gain, old_gain, decrease_step_size, x); + } +} + +} // namespace + +GainApplier::GainApplier(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) {} + +void GainApplier::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + const float kGainIncreaseStepSize48kHz = 0.0001f; + const float kGainDecreaseStepSize48kHz = -0.01f; + const float kGainSaturatedDecreaseStepSize48kHz = -0.05f; + + last_frame_was_saturated_ = false; + old_gain_ = 1.f; + gain_increase_step_size_ = + kGainIncreaseStepSize48kHz * + (static_cast<float>(AudioProcessing::kSampleRate48kHz) / sample_rate_hz); + gain_normal_decrease_step_size_ = + kGainDecreaseStepSize48kHz * + (static_cast<float>(AudioProcessing::kSampleRate48kHz) / sample_rate_hz); + gain_saturated_decrease_step_size_ = + kGainSaturatedDecreaseStepSize48kHz * + (static_cast<float>(AudioProcessing::kSampleRate48kHz) / sample_rate_hz); +} + +int GainApplier::Process(float new_gain, AudioBuffer* audio) { + RTC_CHECK_NE(0.f, gain_increase_step_size_); + RTC_CHECK_NE(0.f, gain_normal_decrease_step_size_); + RTC_CHECK_NE(0.f, gain_saturated_decrease_step_size_); + int num_saturations = 0; + if (new_gain != 1.f) { + float last_applied_gain = 1.f; + float gain_decrease_step_size = last_frame_was_saturated_ + ? gain_saturated_decrease_step_size_ + : gain_normal_decrease_step_size_; + for (size_t k = 0; k < audio->num_channels(); ++k) { + last_applied_gain = ApplyGain( + new_gain, old_gain_, gain_increase_step_size_, + gain_decrease_step_size, + rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames())); + } + + num_saturations = CountSaturations(*audio); + LimitToAllowedRange(audio); + old_gain_ = last_applied_gain; + } + + data_dumper_->DumpRaw("lc_last_applied_gain", 1, &old_gain_); + + return num_saturations; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_applier.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_applier.h new file mode 100644 index 0000000000..5669f45bf7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_applier.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ + +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class GainApplier { + public: + explicit GainApplier(ApmDataDumper* data_dumper); + void Initialize(int sample_rate_hz); + + // Applies the specified gain to the audio frame and returns the resulting + // number of saturated sample values. + int Process(float new_gain, AudioBuffer* audio); + + private: + ApmDataDumper* const data_dumper_; + float old_gain_ = 1.f; + float gain_increase_step_size_ = 0.f; + float gain_normal_decrease_step_size_ = 0.f; + float gain_saturated_decrease_step_size_ = 0.f; + bool last_frame_was_saturated_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(GainApplier); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_selector.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_selector.cc new file mode 100644 index 0000000000..3ab75b1ce6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_selector.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/gain_selector.h" + +#include <math.h> +#include <algorithm> + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller_constants.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +GainSelector::GainSelector() { + Initialize(AudioProcessing::kSampleRate48kHz); +} + +void GainSelector::Initialize(int sample_rate_hz) { + gain_ = 1.f; + frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); + highly_nonstationary_signal_hold_counter_ = 0; +} + +// Chooses the gain to apply by the level controller such that +// 1) The level of the stationary noise does not exceed +// a predefined threshold. +// 2) The gain does not exceed the gain that has been found +// to saturate the signal. +// 3) The peak level achieves the target peak level. +// 4) The gain is not below 1. +// 4) The gain is 1 if the signal has been classified as stationary +// for a long time. +// 5) The gain is not above the maximum gain. +float GainSelector::GetNewGain(float peak_level, + float noise_energy, + float saturating_gain, + bool gain_jumpstart, + SignalClassifier::SignalType signal_type) { + RTC_DCHECK_LT(0.f, peak_level); + + if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary || + gain_jumpstart) { + highly_nonstationary_signal_hold_counter_ = 100; + } else { + highly_nonstationary_signal_hold_counter_ = + std::max(0, highly_nonstationary_signal_hold_counter_ - 1); + } + + float desired_gain; + if (highly_nonstationary_signal_hold_counter_ > 0) { + // Compute a desired gain that ensures that the peak level is amplified to + // the target level. + desired_gain = kTargetLcPeakLevel / peak_level; + + // Limit the desired gain so that it does not amplify the noise too much. + float max_noise_energy = kMaxLcNoisePower * frame_length_; + if (noise_energy * desired_gain * desired_gain > max_noise_energy) { + RTC_DCHECK_LE(0.f, noise_energy); + desired_gain = sqrtf(max_noise_energy / noise_energy); + } + } else { + // If the signal has been stationary for a long while, apply a gain of 1 to + // avoid amplifying pure noise. + desired_gain = 1.0f; + } + + // Smootly update the gain towards the desired gain. + gain_ += 0.2f * (desired_gain - gain_); + + // Limit the gain to not exceed the maximum and the saturating gains, and to + // ensure that the lowest possible gain is 1. + gain_ = std::min(gain_, saturating_gain); + gain_ = std::min(gain_, kMaxLcGain); + gain_ = std::max(gain_, 1.f); + + return gain_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_selector.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_selector.h new file mode 100644 index 0000000000..7966c438d7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/gain_selector.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ + +#include "rtc_base/constructormagic.h" + +#include "modules/audio_processing/level_controller/signal_classifier.h" + +namespace webrtc { + +class GainSelector { + public: + GainSelector(); + void Initialize(int sample_rate_hz); + float GetNewGain(float peak_level, + float noise_energy, + float saturating_gain, + bool gain_jumpstart, + SignalClassifier::SignalType signal_type); + + private: + float gain_; + size_t frame_length_; + int highly_nonstationary_signal_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(GainSelector); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller.cc new file mode 100644 index 0000000000..521f82e842 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller.cc @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/level_controller.h" + +#include <math.h> +#include <algorithm> +#include <numeric> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/level_controller/gain_applier.h" +#include "modules/audio_processing/level_controller/gain_selector.h" +#include "modules/audio_processing/level_controller/noise_level_estimator.h" +#include "modules/audio_processing/level_controller/peak_level_estimator.h" +#include "modules/audio_processing/level_controller/saturating_gain_estimator.h" +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +void UpdateAndRemoveDcLevel(float forgetting_factor, + float* dc_level, + rtc::ArrayView<float> x) { + RTC_DCHECK(!x.empty()); + float mean = + std::accumulate(x.begin(), x.end(), 0.0f) / static_cast<float>(x.size()); + *dc_level += forgetting_factor * (mean - *dc_level); + + for (float& v : x) { + v -= *dc_level; + } +} + +float FrameEnergy(const AudioBuffer& audio) { + float energy = 0.f; + for (size_t k = 0; k < audio.num_channels(); ++k) { + float channel_energy = + std::accumulate(audio.channels_const_f()[k], + audio.channels_const_f()[k] + audio.num_frames(), 0.f, + [](float a, float b) -> float { return a + b * b; }); + energy = std::max(channel_energy, energy); + } + return energy; +} + +float PeakLevel(const AudioBuffer& audio) { + float peak_level = 0.f; + for (size_t k = 0; k < audio.num_channels(); ++k) { + auto* channel_peak_level = std::max_element( + audio.channels_const_f()[k], + audio.channels_const_f()[k] + audio.num_frames(), + [](float a, float b) { return std::abs(a) < std::abs(b); }); + peak_level = std::max(*channel_peak_level, peak_level); + } + return peak_level; +} + +const int kMetricsFrameInterval = 1000; + +} // namespace + +int LevelController::instance_count_ = 0; + +void LevelController::Metrics::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + + Reset(); + frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); +} + +void LevelController::Metrics::Reset() { + metrics_frame_counter_ = 0; + gain_sum_ = 0.f; + peak_level_sum_ = 0.f; + noise_energy_sum_ = 0.f; + max_gain_ = 0.f; + max_peak_level_ = 0.f; + max_noise_energy_ = 0.f; +} + +void LevelController::Metrics::Update(float long_term_peak_level, + float noise_energy, + float gain, + float frame_peak_level) { + const float kdBFSOffset = 90.3090f; + gain_sum_ += gain; + peak_level_sum_ += long_term_peak_level; + noise_energy_sum_ += noise_energy; + max_gain_ = std::max(max_gain_, gain); + max_peak_level_ = std::max(max_peak_level_, long_term_peak_level); + max_noise_energy_ = std::max(max_noise_energy_, noise_energy); + + ++metrics_frame_counter_; + if (metrics_frame_counter_ == kMetricsFrameInterval) { + RTC_DCHECK_LT(0, frame_length_); + RTC_DCHECK_LT(0, kMetricsFrameInterval); + + const int max_noise_power_dbfs = static_cast<int>( + 10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) - kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxNoisePower", + max_noise_power_dbfs, -90, 0, 50); + + const int average_noise_power_dbfs = static_cast<int>( + 10 * log10(noise_energy_sum_ / (frame_length_ * kMetricsFrameInterval) + + 1e-10f) - + kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageNoisePower", + average_noise_power_dbfs, -90, 0, 50); + + const int max_peak_level_dbfs = static_cast<int>( + 10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) - kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxPeakLevel", + max_peak_level_dbfs, -90, 0, 50); + + const int average_peak_level_dbfs = static_cast<int>( + 10 * log10(peak_level_sum_ * peak_level_sum_ / + (kMetricsFrameInterval * kMetricsFrameInterval) + + 1e-10f) - + kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AveragePeakLevel", + average_peak_level_dbfs, -90, 0, 50); + + RTC_DCHECK_LE(1.f, max_gain_); + RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval); + + const int max_gain_db = static_cast<int>(10 * log10(max_gain_ * max_gain_)); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", max_gain_db, 0, + 33, 30); + + const int average_gain_db = static_cast<int>( + 10 * log10(gain_sum_ * gain_sum_ / + (kMetricsFrameInterval * kMetricsFrameInterval))); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain", + average_gain_db, 0, 33, 30); + + const int long_term_peak_level_dbfs = static_cast<int>( + 10 * log10(long_term_peak_level * long_term_peak_level + 1e-10f) - + kdBFSOffset); + + const int frame_peak_level_dbfs = static_cast<int>( + 10 * log10(frame_peak_level * frame_peak_level + 1e-10f) - kdBFSOffset); + + RTC_LOG(LS_INFO) << "Level Controller metrics: {" + << "Max noise power: " << max_noise_power_dbfs << " dBFS, " + << "Average noise power: " << average_noise_power_dbfs + << " dBFS, " + << "Max long term peak level: " << max_peak_level_dbfs + << " dBFS, " + << "Average long term peak level: " + << average_peak_level_dbfs << " dBFS, " + << "Max gain: " << max_gain_db << " dB, " + << "Average gain: " << average_gain_db << " dB, " + << "Long term peak level: " << long_term_peak_level_dbfs + << " dBFS, " + << "Last frame peak level: " << frame_peak_level_dbfs + << " dBFS" + << "}"; + + Reset(); + } +} + +LevelController::LevelController() + : data_dumper_(new ApmDataDumper(instance_count_)), + gain_applier_(data_dumper_.get()), + signal_classifier_(data_dumper_.get()), + peak_level_estimator_(kTargetLcPeakLeveldBFS) { + Initialize(AudioProcessing::kSampleRate48kHz); + ++instance_count_; +} + +LevelController::~LevelController() {} + +void LevelController::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + data_dumper_->InitiateNewSetOfRecordings(); + gain_selector_.Initialize(sample_rate_hz); + gain_applier_.Initialize(sample_rate_hz); + signal_classifier_.Initialize(sample_rate_hz); + noise_level_estimator_.Initialize(sample_rate_hz); + peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs); + saturating_gain_estimator_.Initialize(); + metrics_.Initialize(sample_rate_hz); + + last_gain_ = 1.0f; + sample_rate_hz_ = sample_rate_hz; + dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f; + std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f); +} + +void LevelController::Process(AudioBuffer* audio) { + RTC_DCHECK_LT(0, audio->num_channels()); + RTC_DCHECK_GE(2, audio->num_channels()); + RTC_DCHECK_NE(0.f, dc_forgetting_factor_); + RTC_DCHECK(sample_rate_hz_); + data_dumper_->DumpWav("lc_input", audio->num_frames(), + audio->channels_const_f()[0], *sample_rate_hz_, 1); + + // Remove DC level. + for (size_t k = 0; k < audio->num_channels(); ++k) { + UpdateAndRemoveDcLevel( + dc_forgetting_factor_, &dc_level_[k], + rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames())); + } + + SignalClassifier::SignalType signal_type; + signal_classifier_.Analyze(*audio, &signal_type); + int tmp = static_cast<int>(signal_type); + data_dumper_->DumpRaw("lc_signal_type", 1, &tmp); + + // Estimate the noise energy. + float noise_energy = + noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio)); + + // Estimate the overall signal peak level. + const float frame_peak_level = PeakLevel(*audio); + const float long_term_peak_level = + peak_level_estimator_.Analyze(signal_type, frame_peak_level); + + float saturating_gain = saturating_gain_estimator_.GetGain(); + + // Compute the new gain to apply. + last_gain_ = + gain_selector_.GetNewGain(long_term_peak_level, noise_energy, + saturating_gain, gain_jumpstart_, signal_type); + + // Unflag the jumpstart of the gain as it should only happen once. + gain_jumpstart_ = false; + + // Apply the gain to the signal. + int num_saturations = gain_applier_.Process(last_gain_, audio); + + // Estimate the gain that saturates the overall signal. + saturating_gain_estimator_.Update(last_gain_, num_saturations); + + // Update the metrics. + metrics_.Update(long_term_peak_level, noise_energy, last_gain_, + frame_peak_level); + + data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_); + data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy); + data_dumper_->DumpRaw("lc_peak_level", 1, &long_term_peak_level); + data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain); + + data_dumper_->DumpWav("lc_output", audio->num_frames(), + audio->channels_f()[0], *sample_rate_hz_, 1); +} + +void LevelController::ApplyConfig( + const AudioProcessing::Config::LevelController& config) { + RTC_DCHECK(Validate(config)); + config_ = config; + peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs); + gain_jumpstart_ = true; +} + +std::string LevelController::ToString( + const AudioProcessing::Config::LevelController& config) { + std::stringstream ss; + ss << "{" + << "enabled: " << (config.enabled ? "true" : "false") << ", " + << "initial_peak_level_dbfs: " << config.initial_peak_level_dbfs << "}"; + return ss.str(); +} + +bool LevelController::Validate( + const AudioProcessing::Config::LevelController& config) { + return (config.initial_peak_level_dbfs < + std::numeric_limits<float>::epsilon() && + config.initial_peak_level_dbfs > + -(100.f + std::numeric_limits<float>::epsilon())); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller.h new file mode 100644 index 0000000000..224b886abd --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ + +#include <memory> +#include <vector> + +#include "api/optional.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/gain_applier.h" +#include "modules/audio_processing/level_controller/gain_selector.h" +#include "modules/audio_processing/level_controller/noise_level_estimator.h" +#include "modules/audio_processing/level_controller/peak_level_estimator.h" +#include "modules/audio_processing/level_controller/saturating_gain_estimator.h" +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class LevelController { + public: + LevelController(); + ~LevelController(); + + void Initialize(int sample_rate_hz); + void Process(AudioBuffer* audio); + float GetLastGain() { return last_gain_; } + + // TODO(peah): This method is a temporary solution as the the aim is to + // instead apply the config inside the constructor. Therefore this is likely + // to change. + void ApplyConfig(const AudioProcessing::Config::LevelController& config); + // Validates a config. + static bool Validate(const AudioProcessing::Config::LevelController& config); + // Dumps a config to a string. + static std::string ToString( + const AudioProcessing::Config::LevelController& config); + + private: + class Metrics { + public: + Metrics() { Initialize(AudioProcessing::kSampleRate48kHz); } + void Initialize(int sample_rate_hz); + void Update(float long_term_peak_level, + float noise_level, + float gain, + float frame_peak_level); + + private: + void Reset(); + + size_t metrics_frame_counter_; + float gain_sum_; + float peak_level_sum_; + float noise_energy_sum_; + float max_gain_; + float max_peak_level_; + float max_noise_energy_; + float frame_length_; + }; + + std::unique_ptr<ApmDataDumper> data_dumper_; + GainSelector gain_selector_; + GainApplier gain_applier_; + SignalClassifier signal_classifier_; + NoiseLevelEstimator noise_level_estimator_; + PeakLevelEstimator peak_level_estimator_; + SaturatingGainEstimator saturating_gain_estimator_; + Metrics metrics_; + rtc::Optional<int> sample_rate_hz_; + static int instance_count_; + float dc_level_[2]; + float dc_forgetting_factor_; + float last_gain_; + bool gain_jumpstart_ = false; + AudioProcessing::Config::LevelController config_; + + RTC_DISALLOW_COPY_AND_ASSIGN(LevelController); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc new file mode 100644 index 0000000000..7d61cff21d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <numeric> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "modules/audio_processing/test/simulator_buffers.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" +#include "test/testsupport/perf_test.h" + +namespace webrtc { +namespace { + +const size_t kNumFramesToProcess = 300; +const size_t kNumFramesToProcessAtWarmup = 300; +const size_t kToTalNumFrames = + kNumFramesToProcess + kNumFramesToProcessAtWarmup; + +void RunStandaloneSubmodule(int sample_rate_hz, size_t num_channels) { + test::SimulatorBuffers buffers(sample_rate_hz, sample_rate_hz, sample_rate_hz, + sample_rate_hz, num_channels, num_channels, + num_channels, num_channels); + test::PerformanceTimer timer(kNumFramesToProcess); + + LevelController level_controller; + level_controller.Initialize(sample_rate_hz); + + for (size_t frame_no = 0; frame_no < kToTalNumFrames; ++frame_no) { + buffers.UpdateInputBuffers(); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + timer.StartTimer(); + } + level_controller.Process(buffers.capture_input_buffer.get()); + if (frame_no >= kNumFramesToProcessAtWarmup) { + timer.StopTimer(); + } + } + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels", + "StandaloneLevelControl", timer.GetDurationAverage(), + timer.GetDurationStandardDeviation(), "us", false); +} + +void RunTogetherWithApm(const std::string& test_description, + int render_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_channels, + bool use_mobile_aec, + bool include_default_apm_processing) { + test::SimulatorBuffers buffers( + render_input_sample_rate_hz, capture_input_sample_rate_hz, + render_output_sample_rate_hz, capture_output_sample_rate_hz, num_channels, + num_channels, num_channels, num_channels); + test::PerformanceTimer render_timer(kNumFramesToProcess); + test::PerformanceTimer capture_timer(kNumFramesToProcess); + test::PerformanceTimer total_timer(kNumFramesToProcess); + + webrtc::Config config; + AudioProcessing::Config apm_config; + if (include_default_apm_processing) { + config.Set<DelayAgnostic>(new DelayAgnostic(true)); + config.Set<ExtendedFilter>(new ExtendedFilter(true)); + } + apm_config.level_controller.enabled = true; + apm_config.residual_echo_detector.enabled = include_default_apm_processing; + + std::unique_ptr<AudioProcessing> apm; + apm.reset(AudioProcessing::Create(config)); + ASSERT_TRUE(apm.get()); + apm->ApplyConfig(apm_config); + + ASSERT_EQ(AudioProcessing::kNoError, + apm->gain_control()->Enable(include_default_apm_processing)); + if (use_mobile_aec) { + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_cancellation()->Enable(false)); + ASSERT_EQ(AudioProcessing::kNoError, apm->echo_control_mobile()->Enable( + include_default_apm_processing)); + } else { + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_cancellation()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_control_mobile()->Enable(false)); + } + apm_config.high_pass_filter.enabled = include_default_apm_processing; + ASSERT_EQ(AudioProcessing::kNoError, + apm->noise_suppression()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->voice_detection()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->level_estimator()->Enable(include_default_apm_processing)); + + StreamConfig render_input_config(render_input_sample_rate_hz, num_channels, + false); + StreamConfig render_output_config(render_output_sample_rate_hz, num_channels, + false); + StreamConfig capture_input_config(capture_input_sample_rate_hz, num_channels, + false); + StreamConfig capture_output_config(capture_output_sample_rate_hz, + num_channels, false); + + for (size_t frame_no = 0; frame_no < kToTalNumFrames; ++frame_no) { + buffers.UpdateInputBuffers(); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + total_timer.StartTimer(); + render_timer.StartTimer(); + } + ASSERT_EQ(AudioProcessing::kNoError, + apm->ProcessReverseStream( + &buffers.render_input[0], render_input_config, + render_output_config, &buffers.render_output[0])); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + render_timer.StopTimer(); + + capture_timer.StartTimer(); + } + + ASSERT_EQ(AudioProcessing::kNoError, apm->set_stream_delay_ms(0)); + ASSERT_EQ( + AudioProcessing::kNoError, + apm->ProcessStream(&buffers.capture_input[0], capture_input_config, + capture_output_config, &buffers.capture_output[0])); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + capture_timer.StopTimer(); + total_timer.StopTimer(); + } + } + + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_render", + test_description, render_timer.GetDurationAverage(), + render_timer.GetDurationStandardDeviation(), "us", false); + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_capture", + test_description, capture_timer.GetDurationAverage(), + capture_timer.GetDurationStandardDeviation(), "us", false); + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_total", + test_description, total_timer.GetDurationAverage(), + total_timer.GetDurationStandardDeviation(), "us", false); +} + +} // namespace + +// TODO(peah): Reactivate once issue 7712 has been resolved. +TEST(LevelControllerPerformanceTest, DISABLED_StandaloneProcessing) { + int sample_rates_to_test[] = { + AudioProcessing::kSampleRate8kHz, AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, AudioProcessing::kSampleRate48kHz}; + for (auto sample_rate : sample_rates_to_test) { + for (size_t num_channels = 1; num_channels <= 2; ++num_channels) { + RunStandaloneSubmodule(sample_rate, num_channels); + } + } +} + +void TestSomeSampleRatesWithApm(const std::string& test_name, + bool use_mobile_agc, + bool include_default_apm_processing) { + // Test some stereo combinations first. + size_t num_channels = 2; + RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, num_channels, + use_mobile_agc, include_default_apm_processing); + RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate48kHz, + AudioProcessing::kSampleRate8kHz, num_channels, + use_mobile_agc, include_default_apm_processing); + RunTogetherWithApm(test_name, 48000, 48000, 44100, 44100, num_channels, + use_mobile_agc, include_default_apm_processing); + + // Then test mono combinations. + num_channels = 1; + RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate48kHz, + AudioProcessing::kSampleRate48kHz, num_channels, + use_mobile_agc, include_default_apm_processing); +} + +// TODO(peah): Reactivate once issue 7712 has been resolved. +#if !defined(WEBRTC_ANDROID) +TEST(LevelControllerPerformanceTest, DISABLED_ProcessingViaApm) { +#else +TEST(LevelControllerPerformanceTest, DISABLED_ProcessingViaApm) { +#endif + // Run without default APM processing and desktop AGC. + TestSomeSampleRatesWithApm("SimpleLevelControlViaApm", false, false); +} + +// TODO(peah): Reactivate once issue 7712 has been resolved. +#if !defined(WEBRTC_ANDROID) +TEST(LevelControllerPerformanceTest, DISABLED_InteractionWithDefaultApm) { +#else +TEST(LevelControllerPerformanceTest, DISABLED_InteractionWithDefaultApm) { +#endif + bool include_default_apm_processing = true; + TestSomeSampleRatesWithApm("LevelControlAndDefaultDesktopApm", false, + include_default_apm_processing); + TestSomeSampleRatesWithApm("LevelControlAndDefaultMobileApm", true, + include_default_apm_processing); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_constants.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_constants.h new file mode 100644 index 0000000000..6cf2cd4c7e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_constants.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_ + +namespace webrtc { + +const float kMaxLcGain = 10; +const float kMaxLcNoisePower = 100.f * 100.f; +const float kTargetLcPeakLevel = 16384.f; +const float kTargetLcPeakLeveldBFS = -6.0206f; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc new file mode 100644 index 0000000000..cb36ae08f3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <vector> + +#include "api/array_view.h" +#include "api/optional.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 1000; + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + rtc::Optional<float> initial_peak_level_dbfs, + rtc::ArrayView<const float> output_reference) { + LevelController level_controller; + level_controller.Initialize(sample_rate_hz); + if (initial_peak_level_dbfs) { + AudioProcessing::Config::LevelController config; + config.initial_peak_level_dbfs = *initial_peak_level_dbfs; + level_controller.ApplyConfig(config); + } + + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(samples_per_channel * num_channels); + for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + level_controller.Process(&capture_buffer); + } + + // Extract test results. + std::vector<float> capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kVectorElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kVectorElementErrorBound)); +} + +} // namespace + +TEST(LevelControllerConfig, ToString) { + AudioProcessing::Config config; + config.level_controller.enabled = true; + config.level_controller.initial_peak_level_dbfs = -6.0206f; + EXPECT_EQ("{enabled: true, initial_peak_level_dbfs: -6.0206}", + LevelController::ToString(config.level_controller)); + + config.level_controller.enabled = false; + config.level_controller.initial_peak_level_dbfs = -50.f; + EXPECT_EQ("{enabled: false, initial_peak_level_dbfs: -50}", + LevelController::ToString(config.level_controller)); +} + +TEST(LevelControlBitExactnessTest, Mono8kHz) { + const float kOutputReference[] = {-0.013939f, -0.012154f, -0.009054f}; + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Mono16kHz) { + const float kOutputReference[] = {-0.013706f, -0.013215f, -0.013018f}; + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Mono32kHz) { + const float kOutputReference[] = {-0.014495f, -0.016425f, -0.016085f}; + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, rtc::nullopt, + kOutputReference); +} + +// TODO(peah): Investigate why this particular testcase differ between Android +// and the rest of the platforms. +TEST(LevelControlBitExactnessTest, Mono48kHz) { +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) + const float kOutputReference[] = {-0.014277f, -0.015180f, -0.017437f}; +#else + const float kOutputReference[] = {-0.014306f, -0.015209f, -0.017466f}; +#endif + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo8kHz) { + const float kOutputReference[] = {-0.014063f, -0.008450f, -0.012159f, + -0.051967f, -0.023202f, -0.047858f}; + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo16kHz) { + const float kOutputReference[] = {-0.012714f, -0.005896f, -0.012220f, + -0.053306f, -0.024549f, -0.051527f}; + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo32kHz) { + const float kOutputReference[] = {-0.011764f, -0.007044f, -0.013472f, + -0.053537f, -0.026322f, -0.056253f}; + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo48kHz) { + const float kOutputReference[] = {-0.010643f, -0.006334f, -0.011377f, + -0.049088f, -0.023600f, -0.050465f}; + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, MonoInitial48kHz) { + const float kOutputReference[] = {-0.013884f, -0.014761f, -0.016951f}; + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, -50, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc new file mode 100644 index 0000000000..abf4ea2cb1 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/noise_level_estimator.h" + +#include <algorithm> + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +NoiseLevelEstimator::NoiseLevelEstimator() { + Initialize(AudioProcessing::kSampleRate48kHz); +} + +NoiseLevelEstimator::~NoiseLevelEstimator() {} + +void NoiseLevelEstimator::Initialize(int sample_rate_hz) { + noise_energy_ = 1.f; + first_update_ = true; + min_noise_energy_ = sample_rate_hz * 2.f * 2.f / 100.f; + noise_energy_hold_counter_ = 0; +} + +float NoiseLevelEstimator::Analyze(SignalClassifier::SignalType signal_type, + float frame_energy) { + if (frame_energy <= 0.f) { + return noise_energy_; + } + + if (first_update_) { + // Initialize the noise energy to the frame energy. + first_update_ = false; + return noise_energy_ = std::max(frame_energy, min_noise_energy_); + } + + // Update the noise estimate in a minimum statistics-type manner. + if (signal_type == SignalClassifier::SignalType::kStationary) { + if (frame_energy > noise_energy_) { + // Leak the estimate upwards towards the frame energy if no recent + // downward update. + noise_energy_hold_counter_ = std::max(noise_energy_hold_counter_ - 1, 0); + + if (noise_energy_hold_counter_ == 0) { + noise_energy_ = std::min(noise_energy_ * 1.01f, frame_energy); + } + } else { + // Update smoothly downwards with a limited maximum update magnitude. + noise_energy_ = + std::max(noise_energy_ * 0.9f, + noise_energy_ + 0.05f * (frame_energy - noise_energy_)); + noise_energy_hold_counter_ = 1000; + } + } else { + // For a non-stationary signal, leak the estimate downwards in order to + // avoid estimate locking due to incorrect signal classification. + noise_energy_ = noise_energy_ * 0.99f; + } + + // Ensure a minimum of the estimate. + return noise_energy_ = std::max(noise_energy_, min_noise_energy_); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_level_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_level_estimator.h new file mode 100644 index 0000000000..94ef6737e7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_level_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ + +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class NoiseLevelEstimator { + public: + NoiseLevelEstimator(); + ~NoiseLevelEstimator(); + void Initialize(int sample_rate_hz); + float Analyze(SignalClassifier::SignalType signal_type, float frame_energy); + + private: + float min_noise_energy_ = 0.f; + bool first_update_; + float noise_energy_; + int noise_energy_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc new file mode 100644 index 0000000000..6e921c24d1 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h" + +#include <string.h> +#include <algorithm> + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/arraysize.h" + +namespace webrtc { +namespace { +constexpr float kMinNoisePower = 100.f; +} // namespace + +NoiseSpectrumEstimator::NoiseSpectrumEstimator(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) { + Initialize(); +} + +void NoiseSpectrumEstimator::Initialize() { + std::fill(noise_spectrum_, noise_spectrum_ + arraysize(noise_spectrum_), + kMinNoisePower); +} + +void NoiseSpectrumEstimator::Update(rtc::ArrayView<const float> spectrum, + bool first_update) { + RTC_DCHECK_EQ(65, spectrum.size()); + + if (first_update) { + // Initialize the noise spectral estimate with the signal spectrum. + std::copy(spectrum.data(), spectrum.data() + spectrum.size(), + noise_spectrum_); + } else { + // Smoothly update the noise spectral estimate towards the signal spectrum + // such that the magnitude of the updates are limited. + for (size_t k = 0; k < spectrum.size(); ++k) { + if (noise_spectrum_[k] < spectrum[k]) { + noise_spectrum_[k] = std::min( + 1.01f * noise_spectrum_[k], + noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); + } else { + noise_spectrum_[k] = std::max( + 0.99f * noise_spectrum_[k], + noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); + } + } + } + + // Ensure that the noise spectal estimate does not become too low. + for (auto& v : noise_spectrum_) { + v = std::max(v, kMinNoisePower); + } + + data_dumper_->DumpRaw("lc_noise_spectrum", 65, noise_spectrum_); + data_dumper_->DumpRaw("lc_signal_spectrum", spectrum); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h new file mode 100644 index 0000000000..f10933ec96 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ + +#include "api/array_view.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class NoiseSpectrumEstimator { + public: + explicit NoiseSpectrumEstimator(ApmDataDumper* data_dumper); + void Initialize(); + void Update(rtc::ArrayView<const float> spectrum, bool first_update); + + rtc::ArrayView<const float> GetNoiseSpectrum() const { + return rtc::ArrayView<const float>(noise_spectrum_); + } + + private: + ApmDataDumper* data_dumper_; + float noise_spectrum_[65]; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(NoiseSpectrumEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc new file mode 100644 index 0000000000..e78d74f514 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/peak_level_estimator.h" + +#include <algorithm> + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { + +constexpr float kMinLevel = 30.f; + +} // namespace + +PeakLevelEstimator::PeakLevelEstimator(float initial_peak_level_dbfs) { + Initialize(initial_peak_level_dbfs); +} + +PeakLevelEstimator::~PeakLevelEstimator() {} + +void PeakLevelEstimator::Initialize(float initial_peak_level_dbfs) { + RTC_DCHECK_LE(-100.f, initial_peak_level_dbfs); + RTC_DCHECK_GE(0.f, initial_peak_level_dbfs); + + peak_level_ = std::pow(10.f, initial_peak_level_dbfs / 20.f) * 32768.f; + peak_level_ = std::max(peak_level_, kMinLevel); + + hold_counter_ = 0; + initialization_phase_ = true; +} + +float PeakLevelEstimator::Analyze(SignalClassifier::SignalType signal_type, + float frame_peak_level) { + if (frame_peak_level == 0) { + RTC_DCHECK_LE(kMinLevel, peak_level_); + return peak_level_; + } + + if (peak_level_ < frame_peak_level) { + // Smoothly update the estimate upwards when the frame peak level is + // higher than the estimate. + peak_level_ += 0.1f * (frame_peak_level - peak_level_); + hold_counter_ = 100; + initialization_phase_ = false; + } else { + hold_counter_ = std::max(0, hold_counter_ - 1); + + // When the signal is highly non-stationary, update the estimate slowly + // downwards if the estimate is lower than the frame peak level. + if ((signal_type == SignalClassifier::SignalType::kHighlyNonStationary && + hold_counter_ == 0) || + initialization_phase_) { + peak_level_ = + std::max(peak_level_ + 0.01f * (frame_peak_level - peak_level_), + peak_level_ * 0.995f); + } + } + + peak_level_ = std::max(peak_level_, kMinLevel); + + return peak_level_; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h new file mode 100644 index 0000000000..0aa55d2d55 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ + +#include "modules/audio_processing/level_controller/level_controller_constants.h" +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class PeakLevelEstimator { + public: + explicit PeakLevelEstimator(float initial_peak_level_dbfs); + ~PeakLevelEstimator(); + void Initialize(float initial_peak_level_dbfs); + float Analyze(SignalClassifier::SignalType signal_type, + float frame_peak_level); + private: + float peak_level_; + int hold_counter_; + bool initialization_phase_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(PeakLevelEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc new file mode 100644 index 0000000000..60110c684b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/saturating_gain_estimator.h" + +#include <math.h> +#include <algorithm> + +#include "modules/audio_processing/level_controller/level_controller_constants.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +SaturatingGainEstimator::SaturatingGainEstimator() { + Initialize(); +} + +SaturatingGainEstimator::~SaturatingGainEstimator() {} + +void SaturatingGainEstimator::Initialize() { + saturating_gain_ = kMaxLcGain; + saturating_gain_hold_counter_ = 0; +} + +void SaturatingGainEstimator::Update(float gain, int num_saturations) { + bool too_many_saturations = (num_saturations > 2); + + if (too_many_saturations) { + saturating_gain_ = 0.95f * gain; + saturating_gain_hold_counter_ = 1000; + } else { + saturating_gain_hold_counter_ = + std::max(0, saturating_gain_hold_counter_ - 1); + if (saturating_gain_hold_counter_ == 0) { + saturating_gain_ *= 1.001f; + saturating_gain_ = std::min(kMaxLcGain, saturating_gain_); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h new file mode 100644 index 0000000000..8980f4ef97 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ + +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class SaturatingGainEstimator { + public: + SaturatingGainEstimator(); + ~SaturatingGainEstimator(); + void Initialize(); + void Update(float gain, int num_saturations); + float GetGain() const { return saturating_gain_; } + + private: + float saturating_gain_; + int saturating_gain_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(SaturatingGainEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/signal_classifier.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/signal_classifier.cc new file mode 100644 index 0000000000..d2d5917387 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/signal_classifier.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/signal_classifier.h" + +#include <algorithm> +#include <numeric> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/level_controller/down_sampler.h" +#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace { + +void RemoveDcLevel(rtc::ArrayView<float> x) { + RTC_DCHECK_LT(0, x.size()); + float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f); + mean /= x.size(); + + for (float& v : x) { + v -= mean; + } +} + +void PowerSpectrum(const OouraFft* ooura_fft, + rtc::ArrayView<const float> x, + rtc::ArrayView<float> spectrum) { + RTC_DCHECK_EQ(65, spectrum.size()); + RTC_DCHECK_EQ(128, x.size()); + float X[128]; + std::copy(x.data(), x.data() + x.size(), X); + ooura_fft->Fft(X); + + float* X_p = X; + RTC_DCHECK_EQ(X_p, &X[0]); + spectrum[0] = (*X_p) * (*X_p); + ++X_p; + RTC_DCHECK_EQ(X_p, &X[1]); + spectrum[64] = (*X_p) * (*X_p); + for (int k = 1; k < 64; ++k) { + ++X_p; + RTC_DCHECK_EQ(X_p, &X[2 * k]); + spectrum[k] = (*X_p) * (*X_p); + ++X_p; + RTC_DCHECK_EQ(X_p, &X[2 * k + 1]); + spectrum[k] += (*X_p) * (*X_p); + } +} + +webrtc::SignalClassifier::SignalType ClassifySignal( + rtc::ArrayView<const float> signal_spectrum, + rtc::ArrayView<const float> noise_spectrum, + ApmDataDumper* data_dumper) { + int num_stationary_bands = 0; + int num_highly_nonstationary_bands = 0; + + // Detect stationary and highly nonstationary bands. + for (size_t k = 1; k < 40; k++) { + if (signal_spectrum[k] < 3 * noise_spectrum[k] && + signal_spectrum[k] * 3 > noise_spectrum[k]) { + ++num_stationary_bands; + } else if (signal_spectrum[k] > 9 * noise_spectrum[k]) { + ++num_highly_nonstationary_bands; + } + } + + data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands); + data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1, + &num_highly_nonstationary_bands); + + // Use the detected number of bands to classify the overall signal + // stationarity. + if (num_stationary_bands > 15) { + return SignalClassifier::SignalType::kStationary; + } else if (num_highly_nonstationary_bands > 15) { + return SignalClassifier::SignalType::kHighlyNonStationary; + } else { + return SignalClassifier::SignalType::kNonStationary; + } +} + +} // namespace + +SignalClassifier::FrameExtender::FrameExtender(size_t frame_size, + size_t extended_frame_size) + : x_old_(extended_frame_size - frame_size, 0.f) {} + +SignalClassifier::FrameExtender::~FrameExtender() = default; + +void SignalClassifier::FrameExtender::ExtendFrame( + rtc::ArrayView<const float> x, + rtc::ArrayView<float> x_extended) { + RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size()); + std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data()); + std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size()); + std::copy(x_extended.data() + x_extended.size() - x_old_.size(), + x_extended.data() + x_extended.size(), x_old_.data()); +} + +SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper), + down_sampler_(data_dumper_), + noise_spectrum_estimator_(data_dumper_) { + Initialize(AudioProcessing::kSampleRate48kHz); +} +SignalClassifier::~SignalClassifier() {} + +void SignalClassifier::Initialize(int sample_rate_hz) { + down_sampler_.Initialize(sample_rate_hz); + noise_spectrum_estimator_.Initialize(); + frame_extender_.reset(new FrameExtender(80, 128)); + sample_rate_hz_ = sample_rate_hz; + initialization_frames_left_ = 2; + consistent_classification_counter_ = 3; + last_signal_type_ = SignalClassifier::SignalType::kNonStationary; +} + +void SignalClassifier::Analyze(const AudioBuffer& audio, + SignalType* signal_type) { + RTC_DCHECK_EQ(audio.num_frames(), sample_rate_hz_ / 100); + + // Compute the signal power spectrum. + float downsampled_frame[80]; + down_sampler_.DownSample(rtc::ArrayView<const float>( + audio.channels_const_f()[0], audio.num_frames()), + downsampled_frame); + float extended_frame[128]; + frame_extender_->ExtendFrame(downsampled_frame, extended_frame); + RemoveDcLevel(extended_frame); + float signal_spectrum[65]; + PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum); + + // Classify the signal based on the estimate of the noise spectrum and the + // signal spectrum estimate. + *signal_type = ClassifySignal(signal_spectrum, + noise_spectrum_estimator_.GetNoiseSpectrum(), + data_dumper_); + + // Update the noise spectrum based on the signal spectrum. + noise_spectrum_estimator_.Update(signal_spectrum, + initialization_frames_left_ > 0); + + // Update the number of frames until a reliable signal spectrum is achieved. + initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1); + + if (last_signal_type_ == *signal_type) { + consistent_classification_counter_ = + std::max(0, consistent_classification_counter_ - 1); + } else { + last_signal_type_ = *signal_type; + consistent_classification_counter_ = 3; + } + + if (consistent_classification_counter_ > 0) { + *signal_type = SignalClassifier::SignalType::kNonStationary; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/signal_classifier.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/signal_classifier.h new file mode 100644 index 0000000000..2be13fef7a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_controller/signal_classifier.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ + +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/level_controller/down_sampler.h" +#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h" +#include "modules/audio_processing/utility/ooura_fft.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class SignalClassifier { + public: + enum class SignalType { kHighlyNonStationary, kNonStationary, kStationary }; + + explicit SignalClassifier(ApmDataDumper* data_dumper); + ~SignalClassifier(); + + void Initialize(int sample_rate_hz); + void Analyze(const AudioBuffer& audio, SignalType* signal_type); + + private: + class FrameExtender { + public: + FrameExtender(size_t frame_size, size_t extended_frame_size); + ~FrameExtender(); + + void ExtendFrame(rtc::ArrayView<const float> x, + rtc::ArrayView<float> x_extended); + + private: + std::vector<float> x_old_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FrameExtender); + }; + + ApmDataDumper* const data_dumper_; + DownSampler down_sampler_; + std::unique_ptr<FrameExtender> frame_extender_; + NoiseSpectrumEstimator noise_spectrum_estimator_; + int sample_rate_hz_; + int initialization_frames_left_; + int consistent_classification_counter_; + SignalType last_signal_type_; + const OouraFft ooura_fft_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SignalClassifier); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_impl.cc new file mode 100644 index 0000000000..c937f84525 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_impl.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_estimator_impl.h" + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/rms_level.h" + +namespace webrtc { + +LevelEstimatorImpl::LevelEstimatorImpl(rtc::CriticalSection* crit) + : crit_(crit), rms_(new RmsLevel()) { + RTC_DCHECK(crit); +} + +LevelEstimatorImpl::~LevelEstimatorImpl() {} + +void LevelEstimatorImpl::Initialize() { + rtc::CritScope cs(crit_); + rms_->Reset(); +} + +void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { + RTC_DCHECK(audio); + rtc::CritScope cs(crit_); + if (!enabled_) { + return; + } + + for (size_t i = 0; i < audio->num_channels(); i++) { + rms_->Analyze(rtc::ArrayView<const int16_t>(audio->channels_const()[i], + audio->num_frames())); + } +} + +int LevelEstimatorImpl::Enable(bool enable) { + rtc::CritScope cs(crit_); + if (enable && !enabled_) { + rms_->Reset(); + } + enabled_ = enable; + return AudioProcessing::kNoError; +} + +bool LevelEstimatorImpl::is_enabled() const { + rtc::CritScope cs(crit_); + return enabled_; +} + +int LevelEstimatorImpl::RMS() { + rtc::CritScope cs(crit_); + if (!enabled_) { + return AudioProcessing::kNotEnabledError; + } + + return rms_->Average(); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_impl.h new file mode 100644 index 0000000000..901ae4cf58 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_impl.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ + +#include <memory> + +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" + +namespace webrtc { + +class AudioBuffer; +class RmsLevel; + +class LevelEstimatorImpl : public LevelEstimator { + public: + explicit LevelEstimatorImpl(rtc::CriticalSection* crit); + ~LevelEstimatorImpl() override; + + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(); + void ProcessStream(AudioBuffer* audio); + + // LevelEstimator implementation. + int Enable(bool enable) override; + bool is_enabled() const override; + int RMS() override; + + private: + rtc::CriticalSection* const crit_ = nullptr; + bool enabled_ RTC_GUARDED_BY(crit_) = false; + std::unique_ptr<RmsLevel> rms_ RTC_GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(LevelEstimatorImpl); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_unittest.cc new file mode 100644 index 0000000000..94b84bbdc3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/level_estimator_unittest.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/level_estimator_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 1000; + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + int rms_reference) { + rtc::CriticalSection crit_capture; + LevelEstimatorImpl level_estimator(&crit_capture); + level_estimator.Initialize(); + level_estimator.Enable(true); + + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(samples_per_channel * num_channels); + for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + level_estimator.ProcessStream(&capture_buffer); + } + + // Extract test results. + int rms = level_estimator.RMS(); + + // Compare the output to the reference. + EXPECT_EQ(rms_reference, rms); +} + +} // namespace + +TEST(LevelEstimatorBitExactnessTest, Mono8kHz) { + const int kRmsReference = 31; + + RunBitexactnessTest(8000, 1, kRmsReference); +} + +TEST(LevelEstimatorBitExactnessTest, Mono16kHz) { + const int kRmsReference = 31; + + RunBitexactnessTest(16000, 1, kRmsReference); +} + +TEST(LevelEstimatorBitExactnessTest, Mono32kHz) { + const int kRmsReference = 31; + + RunBitexactnessTest(32000, 1, kRmsReference); +} + +TEST(LevelEstimatorBitExactnessTest, Mono48kHz) { + const int kRmsReference = 31; + + RunBitexactnessTest(48000, 1, kRmsReference); +} + +TEST(LevelEstimatorBitExactnessTest, Stereo16kHz) { + const int kRmsReference = 30; + + RunBitexactnessTest(16000, 2, kRmsReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/logging/apm_data_dumper.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/logging/apm_data_dumper.cc new file mode 100644 index 0000000000..fb25b74fff --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/logging/apm_data_dumper.cc @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/logging/apm_data_dumper.h" + +#include <sstream> + +#include "rtc_base/stringutils.h" + +// Check to verify that the define is properly set. +#if !defined(WEBRTC_APM_DEBUG_DUMP) || \ + (WEBRTC_APM_DEBUG_DUMP != 0 && WEBRTC_APM_DEBUG_DUMP != 1) +#error "Set WEBRTC_APM_DEBUG_DUMP to either 0 or 1" +#endif + +namespace webrtc { + +namespace { + +#if WEBRTC_APM_DEBUG_DUMP == 1 +std::string FormFileName(const char* name, + int instance_index, + int reinit_index, + const std::string& suffix) { +#ifdef WEBRTC_WIN + char sep = '\\'; +#else + char sep = '/'; +#endif + + std::stringstream ss; + std::string base = rtc::LogMessage::aec_debug_filename(); + ss << base; + + if (base.length() && base.back() != sep) { + ss << sep; + } + + ss << name << "_" << instance_index << "-" << reinit_index << suffix; + return ss.str(); +} +#endif + +} // namespace + +#if WEBRTC_APM_DEBUG_DUMP == 1 +ApmDataDumper::ApmDataDumper(int instance_index) + : instance_index_(instance_index) + , debug_written_(0) {} +#else +ApmDataDumper::ApmDataDumper(int instance_index) {} +#endif + +ApmDataDumper::~ApmDataDumper() {} + +#if WEBRTC_APM_DEBUG_DUMP == 1 +FILE* ApmDataDumper::GetRawFile(const char* name) { + std::string filename = + FormFileName(name, instance_index_, recording_set_index_, ".dat"); + auto& f = raw_files_[filename]; + if (!f) { + f.reset(fopen(filename.c_str(), "wb")); + } + return f.get(); +} + +WavWriter* ApmDataDumper::GetWavFile(const char* name, + int sample_rate_hz, + int num_channels) { + std::string filename = + FormFileName(name, instance_index_, recording_set_index_, ".wav"); + auto& f = wav_files_[filename]; + if (!f) { + f.reset(new WavWriter(filename.c_str(), sample_rate_hz, num_channels)); + } + return f.get(); +} + +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/logging/apm_data_dumper.h b/third_party/libwebrtc/webrtc/modules/audio_processing/logging/apm_data_dumper.h new file mode 100644 index 0000000000..83ff8b19ad --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/logging/apm_data_dumper.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ +#define MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ + +#include <stdio.h> + +#include <memory> +#include <string> +#include <unordered_map> + +#include "api/array_view.h" +#include "common_audio/wav_file.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/logging.h" + +// Check to verify that the define is properly set. +#if !defined(WEBRTC_APM_DEBUG_DUMP) || \ + (WEBRTC_APM_DEBUG_DUMP != 0 && WEBRTC_APM_DEBUG_DUMP != 1) +#error "Set WEBRTC_APM_DEBUG_DUMP to either 0 or 1" +#endif + +namespace webrtc { + +#if WEBRTC_APM_DEBUG_DUMP == 1 +// Functor used to use as a custom deleter in the map of file pointers to raw +// files. +struct RawFileCloseFunctor { + void operator()(FILE* f) const { if (f) fclose(f); } +}; +#endif + +// Class that handles dumping of variables into files. +class ApmDataDumper { + public: + // Constructor that takes an instance index that may + // be used to distinguish data dumped from different + // instances of the code. + explicit ApmDataDumper(int instance_index); + + ~ApmDataDumper(); + + // Reinitializes the data dumping such that new versions + // of all files being dumped to are created. + void InitiateNewSetOfRecordings() { +#if WEBRTC_APM_DEBUG_DUMP == 1 + ++recording_set_index_; + debug_written_ = 0; +#endif + } + + // Methods for performing dumping of data of various types into + // various formats. + void DumpRaw(const char* name, double v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(const char* name, size_t v_length, const double* v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(const char* name, rtc::ArrayView<const double> v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + + void DumpRaw(const char* name, float v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(const char* name, size_t v_length, const float* v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(const char* name, rtc::ArrayView<const float> v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + + void DumpRaw(const char* name, bool v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, static_cast<int16_t>(v)); +#endif + } + + void DumpRaw(const char* name, size_t v_length, const bool* v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + for (int k = 0; k < v_length; ++k) { + int16_t value = static_cast<int16_t>(v[k]); + fwrite(&value, sizeof(value), 1, file); + } + } + } +#endif + } + + void DumpRaw(const char* name, rtc::ArrayView<const bool> v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + + void DumpRaw(const char* name, int16_t v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(const char* name, size_t v_length, const int16_t* v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(const char* name, rtc::ArrayView<const int16_t> v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + + void DumpRaw(const char* name, int32_t v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(const char* name, size_t v_length, const int32_t* v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(const char* name, size_t v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(&v, sizeof(v), 1, file); + } + } +#endif + } + + void DumpRaw(const char* name, size_t v_length, const size_t* v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + FILE* file = GetRawFile(name); + if (file) { + fwrite(v, sizeof(v[0]), v_length, file); + } + } +#endif + } + + void DumpRaw(const char* name, rtc::ArrayView<const int32_t> v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + + void DumpWav(const char* name, + size_t v_length, + const float* v, + int sample_rate_hz, + int num_channels) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + if (rtc::LogMessage::aec_debug()) { + WavWriter* file = GetWavFile(name, sample_rate_hz, num_channels); + file->WriteSamples(v, v_length); + // Cheat and use aec_near as a stand-in for "size of the largest file" + // in the dump. We're looking to limit the total time, and that's a + // reasonable stand-in. + if (strcmp(name, "aec_near") == 0) { + updateDebugWritten(v_length * sizeof(float)); + } + } +#endif + } + + void DumpWav(const char* name, + rtc::ArrayView<const float> v, + int sample_rate_hz, + int num_channels) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpWav(name, v.size(), v.data(), sample_rate_hz, num_channels); +#endif + } + + private: +#if WEBRTC_APM_DEBUG_DUMP == 1 + const int instance_index_; + int recording_set_index_ = 0; + std::unordered_map<std::string, std::unique_ptr<FILE, RawFileCloseFunctor>> + raw_files_; + std::unordered_map<std::string, std::unique_ptr<WavWriter>> wav_files_; + + FILE* GetRawFile(const char* name); + WavWriter* GetWavFile(const char* name, int sample_rate_hz, int num_channels); + + uint32_t debug_written_; + + void updateDebugWritten(uint32_t amount) { + debug_written_ += amount; + // Limit largest files to a specific (rough) size, to avoid filling up disk. + if (debug_written_ >= rtc::LogMessage::aec_debug_size()) { + rtc::LogMessage::set_aec_debug(false); + } + } + +#endif + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ApmDataDumper); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LOGGING_APM_DATA_DUMPER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter.cc new file mode 100644 index 0000000000..5245c68d02 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/low_cut_filter.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/audio_buffer.h" + +namespace webrtc { +namespace { +const int16_t kFilterCoefficients8kHz[5] = {3798, -7596, 3798, 7807, -3733}; +const int16_t kFilterCoefficients[5] = {4012, -8024, 4012, 8002, -3913}; +} // namespace + +class LowCutFilter::BiquadFilter { + public: + explicit BiquadFilter(int sample_rate_hz) + : ba_(sample_rate_hz == AudioProcessing::kSampleRate8kHz + ? kFilterCoefficients8kHz + : kFilterCoefficients) { + std::memset(x_, 0, sizeof(x_)); + std::memset(y_, 0, sizeof(y_)); + } + + void Process(int16_t* data, size_t length) { + const int16_t* const ba = ba_; + int16_t* x = x_; + int16_t* y = y_; + int32_t tmp_int32 = 0; + + for (size_t i = 0; i < length; i++) { + // y[i] = b[0] * x[i] + b[1] * x[i-1] + b[2] * x[i-2] + // + -a[1] * y[i-1] + -a[2] * y[i-2]; + + tmp_int32 = y[1] * ba[3]; // -a[1] * y[i-1] (low part) + tmp_int32 += y[3] * ba[4]; // -a[2] * y[i-2] (low part) + tmp_int32 = (tmp_int32 >> 15); + tmp_int32 += y[0] * ba[3]; // -a[1] * y[i-1] (high part) + tmp_int32 += y[2] * ba[4]; // -a[2] * y[i-2] (high part) + tmp_int32 *= 2; + + tmp_int32 += data[i] * ba[0]; // b[0] * x[0] + tmp_int32 += x[0] * ba[1]; // b[1] * x[i-1] + tmp_int32 += x[1] * ba[2]; // b[2] * x[i-2] + + // Update state (input part). + x[1] = x[0]; + x[0] = data[i]; + + // Update state (filtered part). + y[2] = y[0]; + y[3] = y[1]; + y[0] = static_cast<int16_t>(tmp_int32 >> 13); + + y[1] = static_cast<int16_t>((tmp_int32 & 0x00001FFF) * 4); + + // Rounding in Q12, i.e. add 2^11. + tmp_int32 += 2048; + + // Saturate (to 2^27) so that the HP filtered signal does not overflow. + tmp_int32 = WEBRTC_SPL_SAT(static_cast<int32_t>(134217727), tmp_int32, + static_cast<int32_t>(-134217728)); + + // Convert back to Q0 and use rounding. + data[i] = static_cast<int16_t>(tmp_int32 >> 12); + } + } + + private: + const int16_t* const ba_ = nullptr; + int16_t x_[2]; + int16_t y_[4]; +}; + +LowCutFilter::LowCutFilter(size_t channels, int sample_rate_hz) { + filters_.resize(channels); + for (size_t i = 0; i < channels; i++) { + filters_[i].reset(new BiquadFilter(sample_rate_hz)); + } +} + +LowCutFilter::~LowCutFilter() {} + +void LowCutFilter::Process(AudioBuffer* audio) { + RTC_DCHECK(audio); + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(filters_.size(), audio->num_channels()); + for (size_t i = 0; i < filters_.size(); i++) { + filters_[i]->Process(audio->split_bands(i)[kBand0To8kHz], + audio->num_frames_per_band()); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter.h new file mode 100644 index 0000000000..fd4c6f19cb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LOW_CUT_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_LOW_CUT_FILTER_H_ + +#include <memory> +#include <vector> + +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class AudioBuffer; + +class LowCutFilter { + public: + LowCutFilter(size_t channels, int sample_rate_hz); + ~LowCutFilter(); + void Process(AudioBuffer* audio); + + private: + class BiquadFilter; + std::vector<std::unique_ptr<BiquadFilter>> filters_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(LowCutFilter); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LOW_CUT_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter_unittest.cc new file mode 100644 index 0000000000..d98d665d37 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/low_cut_filter_unittest.cc @@ -0,0 +1,682 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/low_cut_filter.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +// Process one frame of data and produce the output. +std::vector<float> ProcessOneFrame(const std::vector<float>& frame_input, + const StreamConfig& stream_config, + LowCutFilter* low_cut_filter) { + AudioBuffer audio_buffer( + stream_config.num_frames(), stream_config.num_channels(), + stream_config.num_frames(), stream_config.num_channels(), + stream_config.num_frames()); + + test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); + low_cut_filter->Process(&audio_buffer); + std::vector<float> frame_output; + test::ExtractVectorFromAudioBuffer(stream_config, &audio_buffer, + &frame_output); + return frame_output; +} + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate, + int num_channels, + const std::vector<float>& input, + const std::vector<float>& reference) { + const StreamConfig stream_config(sample_rate, num_channels, false); + LowCutFilter low_cut_filter(num_channels, sample_rate); + + std::vector<float> output; + const size_t num_frames_to_process = + input.size() / + (stream_config.num_frames() * stream_config.num_channels()); + for (size_t frame_no = 0; frame_no < num_frames_to_process; ++frame_no) { + std::vector<float> frame_input( + input.begin() + + stream_config.num_frames() * stream_config.num_channels() * + frame_no, + input.begin() + + stream_config.num_frames() * stream_config.num_channels() * + (frame_no + 1)); + + output = ProcessOneFrame(frame_input, stream_config, &low_cut_filter); + } + + // Form vector to compare the reference to. Only the last frame processed + // is compared in order not having to specify all preceeding frames as + // inputs. As the algorithm being tested has a memory, testing only + // the last frame implicitly also tests the preceeding frames. + const size_t reference_frame_length = + reference.size() / stream_config.num_channels(); + std::vector<float> output_to_verify; + for (size_t channel_no = 0; channel_no < stream_config.num_channels(); + ++channel_no) { + output_to_verify.insert( + output_to_verify.end(), + output.begin() + channel_no * stream_config.num_frames(), + output.begin() + channel_no * stream_config.num_frames() + + reference_frame_length); + } + + const float kElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + reference_frame_length, num_channels, reference, output_to_verify, + kElementErrorBound)); +} + +// Method for forming a vector out of an array. +// TODO(peah): Remove once braced initialization is allowed. +std::vector<float> CreateVector(const rtc::ArrayView<const float>& array_view) { + std::vector<float> v; + for (auto value : array_view) { + v.push_back(value); + } + return v; +} +} // namespace + +TEST(LowCutFilterBitExactnessTest, Mono8kHzInitial) { + const float kReferenceInput[] = { + 0.153442f, -0.436920f, -0.057602f, -0.141767f, 0.108608f, 0.116834f, + 0.114979f, -0.103151f, -0.169925f, -0.167180f, 0.242024f, -0.525426f, + -0.058781f, 0.076667f, -0.185095f, 0.135319f, -0.020223f, -0.266058f, + 0.045755f, -0.076044f, -0.116221f, -0.201698f, 0.017423f, -0.523475f, + -0.112949f, -0.154125f, -0.258572f, 0.185075f, -0.208205f, 0.153298f, + 0.276703f, -0.044481f, 0.078771f, 0.181337f, -0.022962f, 0.153365f, + -0.358004f, 0.314864f, -0.280593f, -0.518572f, 0.392579f, -0.017786f, + 0.127293f, -0.103003f, -0.289389f, -0.871355f, 0.177583f, -0.081290f, + -0.055957f, 0.115011f, -0.402460f, -0.206836f, 0.325328f, 0.169526f, + -0.363311f, -0.624742f, -0.161979f, 0.060679f, 0.267214f, 0.026576f, + -0.318235f, 0.086812f, -0.332419f, -0.272485f, -0.185369f, -0.348598f, + -0.076833f, -0.255184f, -0.081007f, -0.131121f, -0.116196f, -0.142780f, + 0.349705f, 0.173054f, 0.016750f, -0.415957f, -0.461001f, -0.557111f, + 0.738711f, 0.275720f}; + + const float kReference[] = {0.142277f, -0.418518f, -0.028229f, -0.102112f, + 0.141270f, 0.137791f, 0.124577f, -0.088715f, + -0.142273f, -0.125885f, 0.266640f, -0.468079f}; + + RunBitexactnessTest( + 8000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} + +TEST(LowCutFilterBitExactnessTest, Mono8kHzConverged) { + const float kReferenceInput[] = { + 0.153442f, -0.436920f, -0.057602f, -0.141767f, 0.108608f, 0.116834f, + 0.114979f, -0.103151f, -0.169925f, -0.167180f, 0.242024f, -0.525426f, + -0.058781f, 0.076667f, -0.185095f, 0.135319f, -0.020223f, -0.266058f, + 0.045755f, -0.076044f, -0.116221f, -0.201698f, 0.017423f, -0.523475f, + -0.112949f, -0.154125f, -0.258572f, 0.185075f, -0.208205f, 0.153298f, + 0.276703f, -0.044481f, 0.078771f, 0.181337f, -0.022962f, 0.153365f, + -0.358004f, 0.314864f, -0.280593f, -0.518572f, 0.392579f, -0.017786f, + 0.127293f, -0.103003f, -0.289389f, -0.871355f, 0.177583f, -0.081290f, + -0.055957f, 0.115011f, -0.402460f, -0.206836f, 0.325328f, 0.169526f, + -0.363311f, -0.624742f, -0.161979f, 0.060679f, 0.267214f, 0.026576f, + -0.318235f, 0.086812f, -0.332419f, -0.272485f, -0.185369f, -0.348598f, + -0.076833f, -0.255184f, -0.081007f, -0.131121f, -0.116196f, -0.142780f, + 0.349705f, 0.173054f, 0.016750f, -0.415957f, -0.461001f, -0.557111f, + 0.738711f, 0.275720f, 0.072868f, -0.276249f, -0.325055f, 0.155285f, + 0.443784f, -0.480153f, -0.127428f, -0.023901f, -0.564837f, 0.238538f, + -0.117578f, 0.542205f, -0.110840f, 0.116025f, -0.323939f, -0.177182f, + -0.331395f, 0.111316f, 0.369140f, -0.168329f, 0.123736f, -0.143013f, + 0.028953f, 0.339200f, 0.034107f, -0.294000f, -0.243034f, -0.048168f, + -0.054348f, -0.245504f, 0.051228f, 0.359128f, -0.071220f, -0.058006f, + -0.624248f, -0.219615f, -0.395067f, -0.109518f, 0.149032f, 0.431928f, + 0.509968f, -0.033143f, -0.090793f, 0.231809f, 0.138986f, 0.216989f, + 0.220683f, -0.419745f, 0.153222f, -0.025956f, -0.215572f, -0.196671f, + 0.363361f, -0.229604f, -0.350704f, 0.060875f, 0.570160f, 0.007246f, + 0.087419f, -0.266043f, 0.474729f, 0.035441f, 0.150312f, -0.269962f, + 0.242166f, 0.110343f, -0.327788f, 0.011268f, -0.127769f, 0.030978f, + -0.071045f, -0.053847f, -0.292886f, -0.091670f, 0.217351f, 0.494707f, + -0.329069f, 0.674122f, 0.432724f, 0.047781f, -0.085408f, -0.198105f, + 0.236135f, -0.196957f, -0.130968f, 0.250552f, 0.123613f, 0.254275f, + 0.143118f, -0.113676f, -0.145703f, 0.225812f, -0.190318f, 0.336481f, + 0.224206f, 0.081584f, 0.000915f, 0.103672f, 1.000000f, -0.031882f, + -0.441377f, 0.543033f, 0.172924f, -0.183717f, 0.742153f, 0.156224f, + 0.083422f, -0.220560f, -0.301964f, -0.501439f, -0.119920f, -0.298610f, + 0.183673f, -0.090064f, 0.501603f, 0.428330f, 0.046506f, -0.080178f, + 0.326700f, -0.325096f, 0.191029f, -0.189729f, -0.113513f, -0.190492f, + 0.163221f, -0.220631f, -0.301576f, 0.156799f, -0.120065f, 0.102529f, + -0.099779f, 0.076429f, -0.727157f, 0.132097f, 0.525583f, 0.294694f, + 0.258287f, -0.067977f, 0.051323f, 0.069258f, 0.027332f, -0.235482f, + -0.099882f, -0.049558f, -0.136291f, 0.237288f, 0.719757f, -0.375235f, + 0.036391f, -0.408991f, 0.369330f, 0.399785f, -0.471419f, 0.551138f, + -0.307569f, 0.064315f, 0.311605f, 0.041736f, 0.650943f, 0.780496f}; + + const float kReference[] = {-0.173553f, -0.265778f, 0.158757f, -0.259399f, + -0.176361f, 0.192877f, 0.056825f, 0.171453f, + 0.050752f, -0.194580f, -0.208679f, 0.153722f}; + + RunBitexactnessTest( + 8000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} + +TEST(LowCutFilterBitExactnessTest, Stereo8kHzInitial) { + const float kReferenceInput[] = { + 0.790847f, 0.165037f, 0.165494f, 0.709852f, -0.930269f, 0.770840f, + -0.184538f, -0.927236f, 0.492296f, -0.690342f, -0.712183f, 0.211918f, + -0.491038f, -0.351692f, -0.196418f, -0.187253f, -0.227618f, 0.219604f, + -0.666219f, -0.623816f, -0.810742f, -0.353627f, 0.539194f, -0.531764f, + 0.480731f, 0.385637f, 0.648156f, 0.655955f, -0.413264f, -0.381262f, + 0.046060f, -0.349402f, 0.663685f, 0.620590f, 0.113997f, -0.474072f, + 0.361132f, -0.532694f, -0.087149f, -0.230866f, 0.077203f, 0.983407f, + 0.510441f, 0.960910f, -0.530435f, 0.057118f, -0.897128f, 0.513751f, + 0.203960f, 0.714337f, 0.976554f, 0.858969f, -0.180970f, -0.999317f, + 0.081757f, -0.584539f, -0.561433f, -0.348387f, -0.808101f, 0.495067f, + 0.497018f, 0.086599f, -0.323735f, 0.664667f, 0.105144f, 0.915086f, + 0.785667f, -0.286993f, 0.092804f, -0.306636f, 0.245606f, 0.593249f, + 0.491750f, -0.748928f, 0.644788f, -0.949699f, -0.171142f, 0.462815f, + 0.562748f, -0.265428f, 0.489736f, 0.784534f, -0.514793f, -0.740806f, + -0.549864f, -0.299972f, -0.425831f, 0.854976f, -0.897372f, 0.185334f, + -0.674202f, 0.676812f, -0.664878f, 0.004401f, 0.998659f, -0.289186f, + -0.905845f, -0.572679f, -0.204322f, -0.332664f, -0.540795f, 0.872240f, + 0.366378f, 0.924228f, -0.124054f, 0.880673f, -0.988331f, 0.220614f, + 0.602152f, -0.534037f, 0.864937f, 0.526526f, 0.652899f, 0.146927f, + 0.585163f, -0.341918f, -0.553076f, -0.375227f, 0.169047f, 0.659828f, + -0.419075f, -0.194891f, 0.724115f, 0.229479f, 0.982376f, -0.592602f, + 0.654418f, 0.351723f, -0.502101f, -0.048429f, -0.201850f, 0.198876f, + 0.601046f, -0.789862f, 0.642884f, 0.682173f, -0.290988f, -0.139861f, + 0.144478f, 0.401649f, 0.484940f, 0.515768f, -0.221742f, -0.141395f, + 0.912689f, 0.145943f, 0.699444f, -0.447309f, 0.244647f, 0.176723f, + 0.926937f, -0.828195f, 0.000998f, 0.043179f, -0.819668f, 0.809333f, + 0.768778f, -0.122021f, 0.563445f, -0.703070f}; + + const float kReference[] = { + 0.733329f, 0.084109f, 0.072695f, 0.566210f, -1.000000f, 0.652120f, + -0.297424f, -0.964020f, 0.438551f, -0.698364f, -0.654449f, 0.266243f, + 0.454115f, 0.684774f, -0.586823f, -0.747345f, -0.503021f, -0.222961f, + -0.314972f, 0.907224f, -0.796265f, 0.284280f, -0.533417f, 0.773980f}; + + RunBitexactnessTest( + 8000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} + +TEST(LowCutFilterBitExactnessTest, Stereo8kHzConverged) { + const float kReferenceInput[] = { + -0.502095f, -0.227154f, -0.137133f, 0.661773f, 0.649294f, -0.094003f, + -0.238880f, 0.851737f, 0.481687f, 0.475266f, 0.893832f, 0.020199f, + 0.583758f, -0.095653f, 0.698397f, -0.219138f, 0.476753f, 0.952877f, + 0.046598f, -0.140169f, -0.585684f, -0.353197f, -0.778260f, -0.249580f, + -0.340192f, -0.315790f, 0.634238f, 0.063371f, 0.042244f, 0.548619f, + -0.759474f, 0.250900f, -0.306703f, -0.330761f, 0.149233f, 0.727875f, + -0.602874f, 0.344902f, 0.803663f, -0.601686f, -0.403432f, -0.006959f, + 0.779808f, 0.002829f, -0.446010f, 0.067916f, 0.148499f, -0.174391f, + -0.970473f, 0.405530f, 0.013494f, -0.237468f, -0.870137f, -0.282840f, + -0.531498f, -0.592992f, 0.627559f, -0.213131f, -0.892850f, -0.249897f, + 0.549988f, -0.669405f, 0.824438f, -0.361588f, -0.340441f, -0.591529f, + 0.534429f, -0.860054f, 0.900068f, -0.683580f, -0.427108f, 0.374258f, + -0.717700f, 0.024173f, 0.442654f, 0.857690f, 0.464208f, 0.499696f, + -0.185361f, -0.521017f, 0.041701f, -0.561845f, 0.684776f, 0.325866f, + 0.632471f, 0.587755f, -0.061790f, -0.380950f, 0.375158f, 0.973704f, + 0.539868f, 0.659162f, 0.412170f, 0.190673f, 0.505748f, -0.006556f, + 0.730265f, -0.863945f, 0.937092f, -0.802487f, 0.093954f, -0.194060f, + -0.785920f, 0.448332f, 0.227365f, 0.565936f, 0.133241f, 0.622638f, + 0.153552f, 0.888058f, 0.742904f, 0.015204f, 0.577646f, -0.053939f, + 0.657603f, -0.355037f, 0.952293f, -0.443578f, -0.854338f, 0.502447f, + 0.662377f, 0.844676f, -0.345951f, 0.608139f, 0.076501f, -0.073410f, + 0.641501f, 0.903813f, -0.847454f, 0.417342f, -0.530147f, -0.202209f, + -0.463751f, 0.665027f, 0.990748f, 0.299502f, 0.407906f, 0.864606f, + 0.375305f, 0.136708f, -0.238305f, 0.269159f, -0.273543f, -0.184761f, + -0.262601f, -0.063202f, 0.006828f, 0.821072f, -0.587138f, -0.322793f, + 0.148251f, -0.026135f, -0.475562f, 0.159187f, 0.756655f, -0.878100f, + -0.118247f, -0.831484f, 0.126475f, 0.078621f, 0.536116f, -0.533819f, + 0.174723f, -0.082052f, 0.721963f, 0.321672f, -0.292242f, -0.305627f, + -0.492564f, 0.905056f, -0.403598f, -0.683188f, -0.277406f, 0.483258f, + 0.411800f, 0.401784f, -0.987548f, -0.251309f, 0.802991f, -0.363310f, + 0.194166f, -0.404410f, -0.749971f, -0.223289f, 0.635375f, 0.962351f, + 0.723980f, -0.832358f, -0.324576f, -0.527742f, -0.364389f, 0.968897f, + 0.096502f, 0.498503f, 0.683703f, -0.666221f, 0.806195f, -0.789752f, + 0.490186f, 0.458744f, 0.434939f, -0.733136f, -0.108422f, 0.017574f, + 0.060981f, 0.719434f, 0.355450f, 0.611677f, 0.062486f, 0.911792f, + -0.866646f, 0.083036f, -0.436679f, -0.038199f, 0.369728f, -0.583483f, + 0.216322f, -0.347648f, 0.761694f, -0.733211f, -0.795184f, 0.918234f, + -0.694196f, -0.694924f, -0.688895f, -0.820861f, -0.091151f, 0.337791f, + 0.662603f, 0.580470f, 0.425422f, -0.054805f, 0.417176f, 0.916119f, + 0.011551f, -0.389894f, 0.579622f, -0.527226f, -0.531394f, -0.070601f, + 0.238774f, 0.230659f, -0.754752f, -0.752413f, -0.431082f, 0.471466f, + -0.177384f, 0.657964f, 0.870228f, -0.201867f, -0.895577f, 0.142372f, + 0.495340f, -0.359513f, -0.014131f, -0.556694f, 0.878547f, -0.035389f, + 0.079992f, -0.557886f, -0.808110f, -0.879669f, 0.639018f, 0.542957f, + -0.608609f, 0.790236f, 0.368600f, 0.313693f, 0.980762f, -0.932616f, + -0.151493f, -0.020033f, 0.167009f, -0.833461f, 0.320309f, -0.895390f, + 0.113661f, 0.424050f, -0.024179f, 0.235201f, -0.572445f, 0.291317f, + -0.238715f, -0.792574f, -0.244977f, -0.474278f, -0.517429f, 0.245848f, + 0.045856f, -0.173525f, -0.564416f, 0.717107f, 0.722017f, -0.432122f, + 0.230786f, 0.558979f, 0.909695f, 0.839206f, -0.230369f, -0.674714f, + 0.593503f, -0.772366f, -0.682351f, -0.288344f, 0.695517f, 0.165562f, + 0.172355f, 0.851676f, 0.150157f, -0.980045f, 0.618755f, 0.217617f, + -0.040173f, -0.463120f, -0.483807f, -0.037981f, -0.545317f, -0.902795f, + -0.661516f, -0.483107f, -0.604180f, 0.211386f, 0.647407f, 0.621230f, + 0.604474f, 0.416227f, 0.718756f, 0.562169f, -0.592406f, 0.986686f, + -0.812751f, 0.301237f, -0.569647f, -0.512254f, -0.320624f, -0.604275f, + 0.013667f, 0.901516f, -0.210786f, 0.168930f, 0.213074f, 0.429286f, + -0.196927f, 0.717382f, 0.840970f, 0.501678f, -0.428817f, 0.593632f, + -0.714468f, 0.009100f, 0.221376f, 0.407593f, -0.233320f, 0.457367f, + 0.774569f, -0.888303f, -0.723567f, 0.726130f, -0.156507f, -0.177372f, + 0.918283f, 0.500491f, 0.961994f, -0.532968f, -0.807546f, -0.230836f, + 0.000545f, 0.140512f, 0.953263f, -0.014290f, -0.198234f, 0.989981f, + -0.478004f, 0.330649f, 0.928513f, 0.342302f, -0.401650f, 0.062253f, + -0.997074f, 0.767578f, -0.191232f, -0.397589f, 0.901163f, -0.078704f, + -0.424705f, -0.830747f, 0.164363f, -0.693863f, -0.853811f, 0.161130f, + -0.425970f, -0.276160f, 0.449649f, 0.716623f, -0.304169f, 0.923491f, + 0.907138f, -0.587925f, 0.536490f, 0.231064f, 0.837845f, 0.205075f, + 0.404276f, 0.487350f, -0.229795f, -0.496992f, -0.926481f, -0.055754f, + 0.290145f, -0.442060f, 0.035722f, -0.508667f, -0.404984f, 0.300948f, + 0.782787f, 0.722213f, -0.580170f, -0.201812f, 0.775766f, -0.486944f, + 0.933603f, 0.238315f, -0.669308f, 0.652398f, 0.311386f, 0.092905f, + -0.497341f, -0.919687f, -0.533249f, -0.277774f, 0.266910f, 0.972196f, + -0.585687f, 0.514168f, 0.772656f, -0.055540f, -0.682173f, 0.621842f, + -0.046984f, -0.767425f, 0.751441f, 0.270373f, -0.805437f, 0.816879f, + -0.929968f, -0.920501f, 0.977136f, 0.372363f, -0.246622f, 0.008649f, + 0.526991f, -0.902250f, 0.451855f, 0.402656f, -0.082218f, 0.164590f, + -0.321820f, -0.658749f, -0.201613f, 0.839554f, -0.547909f, -0.277987f, + -0.350876f, -0.832836f, 0.025331f, 0.665730f, 0.809227f, 0.447192f, + -0.234008f, -0.403966f, 0.383423f, 0.760914f, 0.849097f, -0.837494f, + -0.034654f, -0.743470f, -0.494178f, 0.767923f, -0.607446f, -0.757293f}; + + const float kReference[] = { + -0.544495f, 0.264199f, 0.647938f, 0.565569f, 0.496231f, 0.271340f, + 0.519944f, 0.318094f, -0.792999f, 0.733421f, -1.000000f, 0.103977f, + 0.981719f, 0.314859f, 0.476882f, 0.514267f, -0.196381f, -0.425781f, + -0.783081f, 0.101108f, 0.419782f, -0.291718f, 0.183355f, -0.332489f}; + + RunBitexactnessTest( + 8000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} + +TEST(LowCutFilterBitExactnessTest, Mono16kHzInitial) { + const float kReferenceInput[] = { + 0.150254f, 0.512488f, -0.631245f, 0.240938f, 0.089080f, -0.365440f, + -0.121169f, 0.095748f, 1.000000f, 0.773932f, -0.377232f, 0.848124f, + 0.202718f, -0.017621f, 0.199738f, -0.057279f, -0.034693f, 0.416303f, + 0.393761f, 0.396041f, 0.187653f, -0.337438f, 0.200436f, 0.455577f, + 0.136624f, 0.289150f, 0.203131f, -0.084798f, 0.082124f, -0.220010f, + 0.248266f, -0.320554f, -0.298701f, -0.226218f, -0.822794f, 0.401962f, + 0.090876f, -0.210968f, 0.382936f, -0.478291f, -0.028572f, -0.067474f, + 0.089204f, 0.087430f, -0.241695f, -0.008398f, -0.046076f, 0.175416f, + 0.305518f, 0.309992f, -0.241352f, 0.021618f, -0.339291f, -0.311173f, + -0.001914f, 0.428301f, -0.215087f, 0.103784f, -0.063041f, 0.312250f, + -0.304344f, 0.009098f, 0.154406f, 0.307571f, 0.431537f, 0.024014f, + -0.416832f, -0.207440f, -0.296664f, 0.656846f, -0.172033f, 0.209054f, + -0.053772f, 0.248326f, -0.213741f, -0.391871f, -0.397490f, 0.136428f, + -0.049568f, -0.054788f, 0.396633f, 0.081485f, 0.055279f, 0.443690f, + -0.224812f, 0.194675f, 0.233369f, -0.068107f, 0.060270f, -0.325801f, + -0.320801f, 0.029308f, 0.201837f, 0.722528f, -0.186366f, 0.052351f, + -0.023053f, -0.540192f, -0.122671f, -0.501532f, 0.234847f, -0.248165f, + 0.027971f, -0.152171f, 0.084820f, -0.167764f, 0.136923f, 0.206619f, + 0.478395f, -0.054249f, -0.597574f, -0.234627f, 0.378548f, -0.299619f, + 0.268543f, 0.034666f, 0.401492f, -0.547983f, -0.055248f, -0.337538f, + 0.812657f, 0.230611f, 0.385360f, -0.295713f, -0.130957f, -0.076143f, + 0.306960f, -0.077653f, 0.196049f, -0.573390f, -0.098885f, -0.230155f, + -0.440716f, 0.141956f, 0.078802f, 0.009356f, -0.372703f, 0.315083f, + 0.097859f, -0.083575f, 0.006397f, -0.073216f, -0.489105f, -0.079827f, + -0.232329f, -0.273644f, -0.323162f, -0.149105f, -0.559646f, 0.269458f, + 0.145333f, -0.005597f, -0.009717f, -0.223051f, 0.284676f, -0.037228f, + -0.199679f, 0.377651f, -0.062813f, -0.164607f}; + + const float kReference[] = {0.147160f, 0.495163f, -0.648346f, 0.234931f, + 0.075289f, -0.373779f, -0.117676f, 0.100345f, + 0.981719f, 0.714896f, -0.447357f, 0.770867f}; + + RunBitexactnessTest( + 16000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} + +TEST(LowCutFilterBitExactnessTest, Mono16kHzConverged) { + const float kReferenceInput[] = { + 0.150254f, 0.512488f, -0.631245f, 0.240938f, 0.089080f, -0.365440f, + -0.121169f, 0.095748f, 1.000000f, 0.773932f, -0.377232f, 0.848124f, + 0.202718f, -0.017621f, 0.199738f, -0.057279f, -0.034693f, 0.416303f, + 0.393761f, 0.396041f, 0.187653f, -0.337438f, 0.200436f, 0.455577f, + 0.136624f, 0.289150f, 0.203131f, -0.084798f, 0.082124f, -0.220010f, + 0.248266f, -0.320554f, -0.298701f, -0.226218f, -0.822794f, 0.401962f, + 0.090876f, -0.210968f, 0.382936f, -0.478291f, -0.028572f, -0.067474f, + 0.089204f, 0.087430f, -0.241695f, -0.008398f, -0.046076f, 0.175416f, + 0.305518f, 0.309992f, -0.241352f, 0.021618f, -0.339291f, -0.311173f, + -0.001914f, 0.428301f, -0.215087f, 0.103784f, -0.063041f, 0.312250f, + -0.304344f, 0.009098f, 0.154406f, 0.307571f, 0.431537f, 0.024014f, + -0.416832f, -0.207440f, -0.296664f, 0.656846f, -0.172033f, 0.209054f, + -0.053772f, 0.248326f, -0.213741f, -0.391871f, -0.397490f, 0.136428f, + -0.049568f, -0.054788f, 0.396633f, 0.081485f, 0.055279f, 0.443690f, + -0.224812f, 0.194675f, 0.233369f, -0.068107f, 0.060270f, -0.325801f, + -0.320801f, 0.029308f, 0.201837f, 0.722528f, -0.186366f, 0.052351f, + -0.023053f, -0.540192f, -0.122671f, -0.501532f, 0.234847f, -0.248165f, + 0.027971f, -0.152171f, 0.084820f, -0.167764f, 0.136923f, 0.206619f, + 0.478395f, -0.054249f, -0.597574f, -0.234627f, 0.378548f, -0.299619f, + 0.268543f, 0.034666f, 0.401492f, -0.547983f, -0.055248f, -0.337538f, + 0.812657f, 0.230611f, 0.385360f, -0.295713f, -0.130957f, -0.076143f, + 0.306960f, -0.077653f, 0.196049f, -0.573390f, -0.098885f, -0.230155f, + -0.440716f, 0.141956f, 0.078802f, 0.009356f, -0.372703f, 0.315083f, + 0.097859f, -0.083575f, 0.006397f, -0.073216f, -0.489105f, -0.079827f, + -0.232329f, -0.273644f, -0.323162f, -0.149105f, -0.559646f, 0.269458f, + 0.145333f, -0.005597f, -0.009717f, -0.223051f, 0.284676f, -0.037228f, + -0.199679f, 0.377651f, -0.062813f, -0.164607f, -0.082091f, -0.236957f, + -0.313025f, 0.705903f, 0.462637f, 0.085942f, -0.351308f, -0.241859f, + -0.049333f, 0.221165f, -0.372235f, -0.651092f, -0.404957f, 0.093201f, + 0.109366f, 0.126224f, -0.036409f, 0.051333f, -0.133063f, 0.240896f, + -0.380532f, 0.127160f, -0.237176f, -0.093586f, 0.154478f, 0.290379f, + -0.312329f, 0.352297f, 0.184480f, -0.018965f, -0.054555f, -0.060811f, + -0.084705f, 0.006440f, 0.014333f, 0.230847f, 0.426721f, 0.130481f, + -0.058605f, 0.174712f, 0.051204f, -0.287773f, 0.265265f, 0.085810f, + 0.037775f, 0.143988f, 0.073051f, -0.263103f, -0.045366f, -0.040816f, + -0.148673f, 0.470072f, -0.244727f, -0.135204f, -0.198973f, -0.328139f, + -0.053722f, -0.076590f, 0.427586f, -0.069591f, -0.297399f, 0.448094f, + 0.345037f, -0.064170f, -0.420903f, -0.124253f, -0.043578f, 0.077149f, + -0.072983f, 0.123916f, 0.109517f, -0.349508f, -0.264912f, -0.207106f, + -0.141912f, -0.089586f, 0.003485f, -0.846518f, -0.127715f, 0.347208f, + -0.298095f, 0.260935f, 0.097899f, -0.008106f, 0.050987f, -0.437362f, + -0.023625f, 0.448230f, 0.027484f, 0.011562f, -0.205167f, -0.008611f, + 0.064930f, 0.119156f, -0.104183f, -0.066078f, 0.565530f, -0.631108f, + 0.623029f, 0.094334f, 0.279472f, -0.465059f, -0.164888f, -0.077706f, + 0.118130f, -0.466746f, 0.131800f, -0.338936f, 0.018497f, 0.182304f, + 0.091398f, 0.302547f, 0.281153f, -0.181899f, 0.071836f, -0.263911f, + -0.369380f, 0.258447f, 0.000014f, -0.015347f, 0.254619f, 0.166159f, + 0.097865f, 0.349389f, 0.259834f, 0.067003f, -0.192925f, -0.182080f, + 0.333139f, -0.450434f, -0.006836f, -0.544615f, 0.285183f, 0.240811f, + 0.000325f, -0.019796f, -0.694804f, 0.162411f, -0.612686f, -0.648134f, + 0.022338f, -0.265058f, 0.114993f, 0.189185f, 0.239697f, -0.193148f, + 0.125581f, 0.028122f, 0.230849f, 0.149832f, 0.250919f, -0.036871f, + -0.041136f, 0.281627f, -0.593466f, -0.141009f, -0.355074f, -0.106915f, + 0.181276f, 0.230753f, -0.283631f, -0.131643f, 0.038292f, -0.081563f, + 0.084345f, 0.111763f, -0.259882f, -0.049416f, -0.595824f, 0.320077f, + -0.175802f, -0.336422f, -0.070966f, -0.399242f, -0.005829f, -0.156680f, + 0.608591f, 0.318150f, -0.697767f, 0.123331f, -0.390716f, -0.071276f, + 0.045943f, 0.208958f, -0.076304f, 0.440505f, -0.134400f, 0.091525f, + 0.185763f, 0.023806f, 0.246186f, 0.090323f, -0.219133f, -0.504520f, + 0.519393f, -0.168939f, 0.028884f, 0.157380f, 0.031745f, -0.252830f, + -0.130705f, -0.034901f, 0.413302f, -0.240559f, 0.219279f, 0.086246f, + -0.065353f, -0.295376f, -0.079405f, -0.024226f, -0.410629f, 0.053706f, + -0.229794f, -0.026336f, 0.093956f, -0.252810f, -0.080555f, 0.097827f, + -0.513040f, 0.289508f, 0.677527f, 0.268109f, -0.088244f, 0.119781f, + -0.289511f, 0.524778f, 0.262884f, 0.220028f, -0.244767f, 0.089411f, + -0.156018f, -0.087030f, -0.159292f, -0.286646f, -0.253953f, -0.058657f, + -0.474756f, 0.169797f, -0.032919f, 0.195384f, 0.075355f, 0.138131f, + -0.414465f, -0.285118f, -0.124915f, 0.030645f, 0.315431f, -0.081032f, + 0.352546f, 0.132860f, 0.328112f, 0.035476f, -0.183550f, -0.413984f, + 0.043452f, 0.228748f, -0.081765f, -0.151125f, -0.086251f, -0.306448f, + -0.137774f, -0.050508f, 0.012811f, -0.017824f, 0.170841f, 0.030549f, + 0.506935f, 0.087197f, 0.504274f, -0.202080f, 0.147146f, -0.072728f, + 0.167713f, 0.165977f, -0.610894f, -0.370849f, -0.402698f, 0.112297f, + 0.410855f, -0.091330f, 0.227008f, 0.152454f, -0.293884f, 0.111074f, + -0.210121f, 0.423728f, -0.009101f, 0.457188f, -0.118785f, 0.164720f, + -0.017547f, -0.565046f, -0.274461f, 0.171169f, -0.015338f, -0.312635f, + -0.175044f, 0.069729f, -0.277504f, 0.272454f, -0.179049f, 0.505495f, + -0.301774f, 0.055664f, -0.425058f, -0.202222f, -0.165787f, 0.112155f, + 0.263284f, 0.083972f, -0.104256f, 0.227892f, 0.223253f, 0.033592f, + 0.159638f, 0.115358f, -0.275811f, 0.212265f, -0.183658f, -0.168768f}; + + const float kReference[] = {-0.248962f, -0.088257f, 0.083041f, -0.037323f, + 0.127659f, 0.149388f, -0.220978f, -0.004242f, + -0.538544f, 0.384289f, -0.117615f, -0.268524f}; + + RunBitexactnessTest( + 16000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} + +TEST(LowCutFilterBitExactnessTest, Stereo16kHzInitial) { + const float kReferenceInput[] = { + 0.087390f, -0.370759f, -0.235918f, 0.583079f, 0.678359f, 0.360473f, + -0.166156f, 0.285780f, -0.571837f, 0.234542f, 0.350382f, 0.202047f, + -0.307381f, -0.271197f, -0.657038f, 0.590723f, -0.014666f, -0.290754f, + 0.550122f, -0.526390f, 0.689667f, 0.633054f, 0.692457f, -0.259626f, + -0.233541f, 0.722669f, -0.072182f, 0.141096f, 0.390614f, 0.921835f, + 0.092626f, 0.273153f, 0.141785f, 0.854224f, 0.727531f, -0.660321f, + -0.642602f, -0.512991f, 0.503559f, -0.601731f, 0.965881f, 0.419277f, + -0.649128f, 0.716595f, 0.818823f, 0.923326f, 0.141199f, 0.125758f, + -0.646678f, 0.027358f, 0.096944f, -0.669445f, -0.012214f, 0.070235f, + -0.602386f, 0.246338f, -0.947369f, -0.362418f, 0.065999f, -0.346453f, + 0.204381f, -0.276135f, -0.730159f, 0.827627f, 0.281118f, 0.317548f, + 0.350661f, 0.489115f, 0.684355f, 0.033314f, -0.696263f, -0.238671f, + 0.642039f, -0.657271f, -0.340049f, 0.932944f, 0.612585f, -0.555624f, + 0.999546f, -0.872523f, -0.149034f, -0.191324f, -0.199414f, -0.776155f, + -0.151378f, 0.227092f, 0.976123f, -0.560198f, -0.291838f, -0.467516f, + -0.417004f, -0.623221f, -0.954281f, -0.101192f, -0.512720f, 0.737453f, + 0.057222f, 0.828270f, 0.947860f, 0.170852f, -0.762049f, 0.853065f, + 0.187122f, 0.767231f, -0.151048f, 0.214515f, -0.858473f, 0.849545f, + 0.284159f, -0.791001f, 0.400450f, -0.208391f, -0.830190f, -0.571042f, + -0.502402f, -0.546694f, 0.406009f, 0.508305f, 0.094573f, 0.106967f, + 0.261146f, 0.970914f, 0.268556f, 0.200911f, 0.818374f, 0.141673f, + -0.329160f, 0.914278f, -0.120154f, 0.203085f, 0.440525f, 0.357557f, + -0.574482f, -0.836753f, -0.451041f, 0.735037f, 0.118714f, -0.070744f, + -0.139398f, 0.547972f, 0.307841f, 0.315459f, -0.677958f, -0.135246f, + 0.010172f, -0.249335f, -0.039256f, -0.315157f, 0.554293f, -0.232112f, + 0.423113f, -0.038133f, 0.458360f, 0.875118f, 0.034509f, 0.806137f, + -0.563615f, 0.746439f, -0.834614f, -0.069193f, -0.956140f, 0.616561f, + -0.641581f, -0.669216f, -0.636793f, 0.382873f, -0.572473f, -0.403790f, + 0.536670f, 0.002300f, 0.818930f, -0.884294f, -0.126496f, 0.144509f, + 0.130134f, 0.647633f, -0.747802f, -0.399766f, -0.995756f, 0.902215f, + 0.532599f, 0.502608f, -0.722270f, -0.301361f, -0.697319f, -0.006559f, + 0.617305f, 0.265738f, 0.376803f, 0.279140f, 0.458643f, 0.719691f, + 0.253911f, -0.638817f, 0.146613f, -0.672868f, 0.812103f, -0.845314f, + -0.322931f, 0.161235f, -0.049530f, 0.610641f, 0.061556f, -0.545379f, + 0.418970f, -0.702735f, 0.316232f, 0.267965f, -0.541387f, -0.635544f, + -0.667295f, -0.700786f, -0.594505f, 0.909918f, -0.968183f, 0.915029f, + -0.948615f, 0.942221f, -0.404809f, 0.050146f, 0.724678f, 0.792810f, + -0.621979f, 0.321439f, 0.882462f, 0.951414f, -0.784129f, -0.642202f, + 0.493103f, -0.901063f, -0.857430f, -0.021749f, 0.699788f, 0.994083f, + -0.991215f, 0.085215f, 0.722696f, 0.818278f, 0.690701f, 0.757746f, + 0.492364f, -0.765021f, 0.018045f, -0.662336f, 0.662223f, 0.856022f, + -0.661031f, 0.767475f, -0.224274f, -0.234861f, -0.457094f, 0.735766f, + 0.483005f, -0.104255f, 0.419278f, 0.888663f, -0.651764f, -0.510807f, + 0.281858f, 0.617225f, 0.706742f, -0.203765f, -0.769012f, -0.839438f, + -0.279065f, 0.657811f, -0.570781f, 0.582081f, 0.309377f, -0.947707f, + 0.571553f, 0.845126f, -0.015374f, 0.668023f, -0.737293f, 0.519567f, + 0.851472f, 0.665415f, -0.481198f, -0.573956f, 0.044630f, -0.205286f, + -0.041780f, 0.987807f, 0.208957f, 0.889817f, -0.019116f, -0.124107f, + 0.545311f, 0.488133f, -0.114192f, -0.894000f, -0.824356f, 0.595972f, + 0.311165f, -0.935329f, 0.114134f, 0.439603f, -0.779184f, -0.566705f, + 0.622040f, -0.722676f, 0.763798f, 0.847112f, -0.974489f, -0.245681f, + -0.664377f, 0.080446f, -0.796675f, -0.921465f, 0.866458f, 0.943184f, + -0.278144f, 0.288411f, -0.864105f, -0.584176f, -0.920792f, -0.061281f, + -0.699807f, 0.982614f}; + + const float kReference[] = { + 0.085604f, -0.367126f, -0.218170f, 0.594653f, 0.661245f, 0.319041f, + -0.212891f, 0.237800f, -0.614716f, 0.201758f, 0.305032f, 0.144414f, + -0.936523f, 0.647359f, -0.613403f, -0.611542f, -0.549835f, 0.477004f, + -0.477386f, -0.287262f, 0.650746f, 0.101169f, 0.899258f, -0.808014f}; + + RunBitexactnessTest( + 16000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} + +TEST(LowCutFilterBitExactnessTest, Stereo16kHzConverged) { + const float kReferenceInput[] = { + -0.145875f, 0.910744f, 0.448494f, 0.161783f, 0.080516f, 0.410882f, + -0.989942f, 0.565032f, 0.853719f, -0.983409f, 0.649257f, 0.534672f, + 0.994274f, -0.544694f, 0.839084f, 0.283999f, -0.789360f, -0.463678f, + 0.527688f, 0.611020f, -0.791494f, -0.060482f, -0.561876f, 0.845416f, + -0.359355f, 0.715088f, -0.480307f, 0.756126f, -0.623465f, 0.518388f, + -0.936621f, 0.284678f, 0.133742f, -0.247181f, -0.574903f, 0.584314f, + -0.709113f, -0.021715f, -0.974309f, -0.626776f, -0.029539f, 0.676452f, + -0.717886f, 0.464434f, 0.382134f, -0.931015f, -0.022285f, 0.942781f, + -0.775097f, 0.486428f, 0.277083f, 0.188366f, -0.002755f, 0.135705f, + -0.146991f, -0.847521f, -0.418827f, 0.122670f, 0.266667f, 0.861552f, + 0.955538f, -0.812807f, 0.323470f, 0.205546f, -0.052364f, -0.287487f, + -0.048843f, 0.342044f, 0.919290f, -0.821831f, 0.595485f, 0.181551f, + 0.824394f, -0.797741f, -0.413411f, -0.896824f, 0.008256f, 0.536752f, + -0.434029f, -0.549280f, -0.337421f, -0.093497f, 0.474769f, 0.019771f, + -0.234972f, 0.810966f, 0.930515f, 0.256535f, -0.735938f, 0.236604f, + -0.233960f, 0.982387f, -0.426345f, 0.412383f, 0.070412f, -0.613578f, + 0.378870f, -0.899090f, -0.631132f, -0.908683f, 0.770083f, 0.679589f, + -0.763690f, -0.179170f, -0.759543f, 0.144185f, 0.898780f, -0.487230f, + 0.979731f, -0.300384f, -0.582955f, 0.331654f, 0.946689f, 0.245400f, + -0.872924f, -0.252981f, -0.667497f, -0.537444f, -0.895583f, 0.803513f, + 0.586583f, -0.253971f, 0.664109f, 0.507669f, 0.243726f, -0.211814f, + -0.281444f, -0.822295f, -0.316646f, 0.097341f, -0.078905f, 0.290905f, + 0.027042f, 0.628853f, -0.805634f, -0.072573f, 0.179635f, -0.625656f, + 0.222660f, -0.896116f, 0.151454f, 0.684689f, -0.000548f, -0.121950f, + -0.701886f, -0.943441f, 0.513340f, 0.592212f, -0.412889f, -0.769587f, + -0.249817f, 0.657787f, 0.683553f, 0.330477f, 0.920280f, 0.886236f, + -0.774601f, 0.296575f, -0.038392f, -0.866959f, 0.795542f, -0.005540f, + 0.542607f, -0.879276f, -0.475085f, 0.302139f, -0.732792f, 0.277091f, + -0.230114f, 0.531396f, 0.305831f, -0.237022f, -0.399963f, -0.319721f, + 0.837853f, -0.087466f, -0.115006f, -0.091628f, 0.890564f, -0.561762f, + 0.764806f, -0.960249f, -0.316470f, 0.532055f, -0.314393f, 0.237613f, + -0.093958f, -0.979675f, 0.198162f, 0.203137f, 0.298835f, -0.314559f, + -0.013401f, 0.403548f, 0.775605f, -0.889884f, -0.803276f, 0.299566f, + 0.528142f, 0.975918f, -0.749350f, -0.271046f, 0.352460f, -0.248484f, + 0.726917f, -0.416046f, -0.733050f, 0.345301f, -0.594830f, 0.737030f, + 0.502315f, -0.161241f, -0.999538f, -0.701073f, -0.452331f, 0.744850f, + 0.202502f, -0.357623f, -0.431414f, -0.129368f, 0.807518f, 0.850211f, + 0.010585f, 0.255164f, 0.438528f, -0.952174f, 0.149865f, -0.906931f, + -0.154937f, -0.064531f, -0.954744f, -0.869852f, 0.847913f, 0.068286f, + -0.266407f, -0.272108f, -0.697253f, -0.700783f, -0.298396f, -0.328068f, + 0.568056f, -0.026522f, -0.070404f, -0.737495f, 0.772783f, 0.349115f, + 0.670319f, 0.312976f, 0.967834f, 0.959580f, -0.499694f, 0.249141f, + 0.456485f, -0.003659f, 0.699657f, -0.618164f, -0.751712f, -0.994419f, + -0.694094f, 0.068322f, 0.021267f, -0.229568f, -0.378807f, -0.992889f, + 0.630485f, 0.276837f, -0.103321f, -0.511828f, 0.606770f, 0.647942f, + 0.704381f, -0.065496f, 0.941398f, 0.682488f, -0.842904f, -0.524802f, + 0.635142f, -0.188343f, -0.067376f, 0.903072f, 0.930011f, 0.530570f, + 0.149067f, 0.831850f, -0.009135f, -0.667975f, -0.348005f, -0.407128f, + 0.116597f, -0.865046f, -0.862044f, -0.666431f, 0.894877f, 0.622177f, + 0.420911f, 0.940491f, 0.996854f, 0.974910f, -0.699827f, 0.916958f, + 0.060918f, -0.851827f, -0.376358f, 0.790342f, 0.669537f, -0.995302f, + 0.280420f, 0.606365f, -0.509738f, -0.871756f, -0.473703f, -0.794559f, + -0.032562f, -0.162231f, -0.237422f, 0.773530f, -0.158885f, -0.432304f, + -0.903638f, -0.561668f, -0.521648f, -0.941483f, 0.404622f, -0.984729f, + 0.221841f, -0.183821f, -0.502107f, 0.304919f, -0.359446f, -0.792656f, + 0.071130f, -0.670260f, 0.766877f, 0.332914f, 0.695485f, 0.525322f, + 0.614028f, 0.265905f, 0.420855f, 0.377327f, -0.358104f, 0.063297f, + 0.746388f, -0.890921f, 0.000802f, -0.134474f, 0.808565f, 0.260367f, + 0.966072f, 0.170401f, 0.681273f, -0.062372f, 0.090445f, -0.641792f, + 0.268923f, 0.925918f, 0.068028f, -0.040771f, 0.587332f, -0.814573f, + 0.761599f, -0.992253f, 0.023058f, 0.356927f, 0.131495f, -0.043083f, + -0.358974f, 0.203160f, 0.826305f, 0.365036f, 0.893467f, -0.801822f, + 0.022058f, -0.779743f, 0.090524f, 0.377572f, -0.705166f, 0.555122f, + -0.201898f, 0.796600f, -0.385912f, -0.877898f, -0.561058f, -0.834334f, + 0.900791f, -0.967259f, -0.770663f, -0.975180f, -0.567545f, -0.977145f, + 0.284899f, 0.033982f, -0.508916f, -0.612505f, -0.818259f, -0.263117f, + -0.984414f, 0.205403f, -0.042291f, -0.383765f, 0.488889f, 0.678699f, + -0.475136f, 0.028476f, -0.106452f, -0.317578f, 0.678284f, 0.964985f, + 0.252929f, -0.637450f, -0.753966f, 0.159937f, -0.342928f, -0.463627f, + 0.100478f, -0.638966f, 0.356984f, -0.888623f, -0.931886f, -0.426963f, + -0.845220f, 0.801145f, 0.693212f, -0.208603f, -0.661569f, -0.139095f, + -0.167564f, 0.457527f, -0.187053f, 0.903615f, 0.823970f, 0.902829f, + -0.307998f, -0.419512f, 0.773402f, -0.579938f, -0.738247f, 0.041032f, + 0.810925f, -0.194940f, -0.568477f, -0.842521f, 0.866120f, 0.205743f, + -0.245016f, 0.329863f, 0.584381f, -0.333016f, 0.385318f, -0.592369f, + 0.917427f, 0.423665f, -0.666187f, -0.114446f, 0.265987f, 0.859934f, + 0.058662f, 0.252949f, 0.361638f, 0.846395f, -0.694332f, -0.188558f, + -0.375048f, 0.387798f, 0.781376f, -0.018658f, 0.611647f, -0.347122f, + 0.099758f, -0.222431f, 0.793658f, 0.352240f, 0.656794f, -0.779822f, + -0.441545f, 0.535272f, -0.567887f, -0.931876f, -0.126896f, 0.873727f, + -0.475822f, 0.139491f, -0.280894f, -0.946323f, 0.000838f, 0.654030f, + -0.482035f, -0.908230f, -0.507057f, 0.321464f, -0.341181f, 0.318992f, + -0.973992f, 0.436136f, -0.217762f, -0.932989f, -0.187969f, 0.432615f, + 0.842673f, 0.968031f, 0.966842f, 0.792612f, 0.731406f, 0.601922f, + 0.109958f, -0.162256f, -0.745755f, 0.309241f, 0.727930f, -0.450803f, + 0.680328f, -0.858490f, -0.242416f, -0.463661f, -0.694158f, 0.261999f, + -0.367250f, 0.918224f, -0.002652f, 0.477217f, -0.974489f, 0.210706f, + 0.152903f, 0.614758f, 0.309936f, 0.756457f, 0.804746f, -0.695534f, + -0.614840f, 0.581951f, -0.878590f, -0.220346f, -0.400068f, 0.468360f, + -0.791581f, 0.585151f, 0.565458f, 0.064795f, -0.493295f, -0.858091f, + 0.251607f, -0.950637f, -0.875915f, -0.740776f, -0.098772f, 0.344672f, + 0.712222f, -0.003109f, -0.902431f, -0.372335f, 0.283262f, 0.572773f, + -0.421699f, -0.004264f, 0.636869f, 0.190257f, 0.072849f, -0.338254f, + -0.176620f, 0.588012f, -0.313584f, -0.074787f, -0.264353f, 0.359141f, + 0.135558f, 0.303554f, -0.017773f, -0.203084f, -0.045032f, -0.866825f, + -0.177943f, 0.938184f, 0.561442f, 0.458036f, 0.531301f, 0.513162f, + 0.686541f, 0.540314f, 0.957322f, -0.777281f, -0.207846f, -0.015879f, + -0.483811f, -0.926068f, 0.948763f, 0.452852f, -0.704070f, -0.704211f, + 0.409648f, -0.238013f, -0.847177f, -0.178319f, -0.714019f, 0.597840f, + 0.860496f, -0.990561f, 0.300081f, 0.357065f, -0.492754f, 0.686362f, + -0.412082f, -0.946279f, -0.813386f, 0.595770f, 0.422805f, 0.566814f, + 0.247845f, 0.650831f, -0.929955f, -0.189050f, -0.500662f, -0.038206f, + 0.761678f, -0.438630f, 0.198285f, -0.947548f, -0.689603f, 0.667822f, + -0.610213f, 0.659576f, -0.323850f, 0.342233f, -0.895267f, 0.468618f, + -0.001036f, 0.886600f, -0.420455f, -0.246879f, -0.772489f, 0.929701f, + -0.134977f, -0.830874f, 0.433353f, 0.013575f, -0.343825f, 0.507048f, + 0.672012f, -0.492567f, 0.068850f, -0.129670f, -0.684592f, 0.200962f, + 0.874902f, -0.784483f, 0.799963f, 0.100930f, -0.145287f, -0.695238f, + -0.504908f, -0.105262f, 0.065567f, -0.290698f, 0.546230f, 0.763362f, + 0.468184f, -0.187136f, 0.208357f, 0.282210f, -0.745066f, -0.007616f, + -0.379061f, 0.157149f, 0.887218f, -0.146121f, -0.933743f, 0.858868f, + 0.849965f, -0.283386f, -0.480022f, 0.573719f, 0.023164f, 0.125054f, + 0.369588f, -0.815207f, 0.745158f, 0.885876f, -0.806812f, 0.691765f, + 0.818791f, -0.977318f, 0.047365f, 0.300691f, -0.229709f, 0.298604f, + 0.525707f, 0.151372f, 0.263838f, -0.443592f, 0.679673f, -0.146330f, + 0.263245f, 0.666934f, -0.459629f, -0.198399f, 0.108509f, -0.112269f, + -0.819232f, 0.488763f, -0.934769f, -0.140515f, -0.925475f, 0.951596f, + 0.044680f, 0.819260f, -0.233504f, 0.768904f, -0.489965f, 0.818100f, + 0.789121f, -0.202966f, 0.250040f, 0.135195f, 0.789024f, -0.571668f, + -0.992282f, 0.761163f, -0.529757f, -0.510271f, 0.281834f, -0.390951f, + 0.651242f, 0.767377f, 0.890746f, -0.218409f, 0.602640f, -0.685773f, + 0.250331f, 0.397971f, -0.828262f, 0.062359f, 0.777133f, -0.472668f, + -0.530429f, 0.679314f, -0.008920f, -0.695267f, -0.538464f, 0.315908f, + 0.125897f, -0.416343f, 0.244610f, 0.431811f, -0.438538f, -0.175454f, + -0.275589f, 0.562784f, -0.729026f, 0.804139f, -0.420728f, -0.000884f, + 0.567181f, 0.354124f, -0.700377f, 0.393239f, -0.741974f, 0.891893f, + 0.772824f, 0.030009f, 0.358817f, 0.953587f, -0.749079f, 0.504486f, + 0.654104f, 0.562861f, -0.618235f, -0.142717f, -0.971087f, -0.349429f, + -0.730596f, -0.098965f, 0.144550f, 0.584047f, -0.160527f, 0.065073f, + 0.851409f, 0.798164f, 0.089667f, 0.802248f, -0.896347f, 0.617205f, + -0.330191f, -0.542634f, 0.644804f, -0.303531f, -0.669059f, -0.943733f, + 0.910740f, 0.360581f, 0.721124f, 0.878187f, 0.360388f, 0.834847f, + -0.486617f, 0.771236f, 0.840086f, -0.399873f, -0.853218f, 0.534797f, + -0.830096f, 0.457528f, -0.104221f, 0.302497f, -0.660996f, 0.062898f, + 0.267602f, -0.971808f, -0.059257f, 0.772652f, -0.771943f, -0.114918f, + 0.319096f, -0.410454f, 0.900737f, 0.388572f, -0.586387f, 0.109525f, + 0.758557f, 0.115715f, 0.504668f, 0.789802f, 0.683688f, -0.738287f, + -0.621692f, -0.692720f, -0.942196f, -0.981830f, 0.192903f, 0.218099f, + 0.837847f, 0.467149f, -0.397706f, -0.008851f, -0.483674f, 0.465709f, + -0.766478f, 0.492083f, 0.619578f, 0.490467f, -0.325713f, 0.168650f, + -0.062096f, -0.825470f, 0.657435f, 0.371889f, -0.465350f, 0.938967f, + -0.632452f, -0.400118f, -0.177630f, -0.527022f, -0.609889f, 0.410759f, + -0.638903f, 0.044666f, -0.407656f, -0.074436f, 0.850465f, -0.568222f, + -0.997982f, 0.813212f, 0.360084f, 0.029904f, 0.044138f, -0.794163f, + 0.993761f, -0.282062f, 0.250485f, -0.213267f, -0.984675f, 0.090570f, + 0.018221f, -0.506442f, -0.909209f, 0.683459f, -0.903500f, -0.367359f, + 0.566839f, 0.944800f, 0.172928f, 0.556088f, 0.455395f, 0.301974f, + 0.329230f, 0.877560f, 0.070163f, -0.203120f, 0.340915f, -0.118931f, + -0.734252f, -0.121593f, 0.095285f, -0.209727f, -0.203456f, 0.502697f, + 0.044701f, -0.019134f, -0.822642f, -0.498297f, -0.104882f, 0.275922f, + 0.418891f, 0.985240f, 0.864390f, -0.815541f, 0.907080f, -0.674409f, + 0.940910f, 0.194013f, -0.519546f, -0.859410f, -0.399918f, 0.627090f, + -0.846580f, -0.291054f, -0.735978f, -0.683641f, -0.875706f, 0.403687f, + -0.827037f, 0.233574f, -0.652457f, 0.302802f, -0.002607f, -0.430979f, + 0.661119f, 0.636720f, 0.876339f, -0.999348f, 0.280778f, -0.985289f, + -0.787158f, -0.786411f, -0.265782f, -0.520785f, -0.307720f, -0.500760f, + -0.225871f, -0.157923f, 0.280155f, 0.575106f, -0.460011f, 0.687965f, + 0.480937f, 0.652204f, -0.635616f, -0.869128f, 0.220701f, 0.403106f, + -0.776765f, -0.808353f, 0.195668f, 0.624465f, 0.629156f, -0.821126f, + 0.462557f, 0.807713f, -0.095536f, -0.858625f, -0.517444f, 0.463730f}; + + const float kReference[] = { + -0.816528f, 0.085421f, 0.739647f, -0.922089f, 0.669301f, -0.048187f, + -0.290039f, -0.818085f, -0.596008f, -0.177826f, -0.002197f, -0.350647f, + -0.064301f, 0.337291f, -0.621765f, 0.115909f, 0.311899f, -0.915924f, + 0.020478f, 0.836055f, -0.714020f, -0.037140f, 0.391125f, -0.340118f}; + + RunBitexactnessTest( + 16000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)), + CreateVector(rtc::ArrayView<const float>(kReference))); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_impl.cc new file mode 100644 index 0000000000..8dd713f4e5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_impl.cc @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/noise_suppression_impl.h" + +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/constructormagic.h" +#if defined(WEBRTC_NS_FLOAT) +#include "modules/audio_processing/ns/noise_suppression.h" +#define NS_CREATE WebRtcNs_Create +#define NS_FREE WebRtcNs_Free +#define NS_INIT WebRtcNs_Init +#define NS_SET_POLICY WebRtcNs_set_policy +typedef NsHandle NsState; +#elif defined(WEBRTC_NS_FIXED) +#include "modules/audio_processing/ns/noise_suppression_x.h" +#define NS_CREATE WebRtcNsx_Create +#define NS_FREE WebRtcNsx_Free +#define NS_INIT WebRtcNsx_Init +#define NS_SET_POLICY WebRtcNsx_set_policy +typedef NsxHandle NsState; +#endif + +namespace webrtc { +class NoiseSuppressionImpl::Suppressor { + public: + explicit Suppressor(int sample_rate_hz) { + state_ = NS_CREATE(); + RTC_CHECK(state_); + int error = NS_INIT(state_, sample_rate_hz); + RTC_DCHECK_EQ(0, error); + } + ~Suppressor() { + NS_FREE(state_); + } + NsState* state() { return state_; } + private: + NsState* state_ = nullptr; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Suppressor); +}; + +NoiseSuppressionImpl::NoiseSuppressionImpl(rtc::CriticalSection* crit) + : crit_(crit) { + RTC_DCHECK(crit); +} + +NoiseSuppressionImpl::~NoiseSuppressionImpl() {} + +void NoiseSuppressionImpl::Initialize(size_t channels, int sample_rate_hz) { + rtc::CritScope cs(crit_); + channels_ = channels; + sample_rate_hz_ = sample_rate_hz; + std::vector<std::unique_ptr<Suppressor>> new_suppressors; + if (enabled_) { + new_suppressors.resize(channels); + for (size_t i = 0; i < channels; i++) { + new_suppressors[i].reset(new Suppressor(sample_rate_hz)); + } + } + suppressors_.swap(new_suppressors); + set_level(level_); +} + +void NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { + RTC_DCHECK(audio); +#if defined(WEBRTC_NS_FLOAT) + rtc::CritScope cs(crit_); + if (!enabled_) { + return; + } + + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels()); + for (size_t i = 0; i < suppressors_.size(); i++) { + WebRtcNs_Analyze(suppressors_[i]->state(), + audio->split_bands_const_f(i)[kBand0To8kHz]); + } +#endif +} + +void NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + RTC_DCHECK(audio); + rtc::CritScope cs(crit_); + if (!enabled_) { + return; + } + + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels()); + for (size_t i = 0; i < suppressors_.size(); i++) { +#if defined(WEBRTC_NS_FLOAT) + WebRtcNs_Process(suppressors_[i]->state(), + audio->split_bands_const_f(i), + audio->num_bands(), + audio->split_bands_f(i)); +#elif defined(WEBRTC_NS_FIXED) + WebRtcNsx_Process(suppressors_[i]->state(), + audio->split_bands_const(i), + audio->num_bands(), + audio->split_bands(i)); +#endif + } +} + +int NoiseSuppressionImpl::Enable(bool enable) { + rtc::CritScope cs(crit_); + if (enabled_ != enable) { + enabled_ = enable; + Initialize(channels_, sample_rate_hz_); + } + return AudioProcessing::kNoError; +} + +bool NoiseSuppressionImpl::is_enabled() const { + rtc::CritScope cs(crit_); + return enabled_; +} + +int NoiseSuppressionImpl::set_level(Level level) { + int policy = 1; + switch (level) { + case NoiseSuppression::kLow: + policy = 0; + break; + case NoiseSuppression::kModerate: + policy = 1; + break; + case NoiseSuppression::kHigh: + policy = 2; + break; + case NoiseSuppression::kVeryHigh: + policy = 3; + break; + default: + RTC_NOTREACHED(); + } + rtc::CritScope cs(crit_); + level_ = level; + for (auto& suppressor : suppressors_) { + int error = NS_SET_POLICY(suppressor->state(), policy); + RTC_DCHECK_EQ(0, error); + } + return AudioProcessing::kNoError; +} + +NoiseSuppression::Level NoiseSuppressionImpl::level() const { + rtc::CritScope cs(crit_); + return level_; +} + +float NoiseSuppressionImpl::speech_probability() const { + rtc::CritScope cs(crit_); +#if defined(WEBRTC_NS_FLOAT) + float probability_average = 0.0f; + for (auto& suppressor : suppressors_) { + probability_average += + WebRtcNs_prior_speech_probability(suppressor->state()); + } + if (!suppressors_.empty()) { + probability_average /= suppressors_.size(); + } + return probability_average; +#elif defined(WEBRTC_NS_FIXED) + // TODO(peah): Returning error code as a float! Remove this. + // Currently not available for the fixed point implementation. + return AudioProcessing::kUnsupportedFunctionError; +#endif +} + +std::vector<float> NoiseSuppressionImpl::NoiseEstimate() { + rtc::CritScope cs(crit_); + std::vector<float> noise_estimate; +#if defined(WEBRTC_NS_FLOAT) + const float kNumChannelsFraction = 1.f / suppressors_.size(); + noise_estimate.assign(WebRtcNs_num_freq(), 0.f); + for (auto& suppressor : suppressors_) { + const float* noise = WebRtcNs_noise_estimate(suppressor->state()); + for (size_t i = 0; i < noise_estimate.size(); ++i) { + noise_estimate[i] += kNumChannelsFraction * noise[i]; + } + } +#elif defined(WEBRTC_NS_FIXED) + noise_estimate.assign(WebRtcNsx_num_freq(), 0.f); + for (auto& suppressor : suppressors_) { + int q_noise; + const uint32_t* noise = WebRtcNsx_noise_estimate(suppressor->state(), + &q_noise); + const float kNormalizationFactor = + 1.f / ((1 << q_noise) * suppressors_.size()); + for (size_t i = 0; i < noise_estimate.size(); ++i) { + noise_estimate[i] += kNormalizationFactor * noise[i]; + } + } +#endif + return noise_estimate; +} + +size_t NoiseSuppressionImpl::num_noise_bins() { +#if defined(WEBRTC_NS_FLOAT) + return WebRtcNs_num_freq(); +#elif defined(WEBRTC_NS_FIXED) + return WebRtcNsx_num_freq(); +#endif +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_impl.h new file mode 100644 index 0000000000..fba716e253 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_impl.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ + +#include <memory> +#include <vector> + +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" + +namespace webrtc { + +class AudioBuffer; + +class NoiseSuppressionImpl : public NoiseSuppression { + public: + explicit NoiseSuppressionImpl(rtc::CriticalSection* crit); + ~NoiseSuppressionImpl() override; + + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(size_t channels, int sample_rate_hz); + void AnalyzeCaptureAudio(AudioBuffer* audio); + void ProcessCaptureAudio(AudioBuffer* audio); + + // NoiseSuppression implementation. + int Enable(bool enable) override; + bool is_enabled() const override; + int set_level(Level level) override; + Level level() const override; + float speech_probability() const override; + std::vector<float> NoiseEstimate() override; + static size_t num_noise_bins(); + + private: + class Suppressor; + rtc::CriticalSection* const crit_; + bool enabled_ RTC_GUARDED_BY(crit_) = false; + Level level_ RTC_GUARDED_BY(crit_) = kModerate; + size_t channels_ RTC_GUARDED_BY(crit_) = 0; + int sample_rate_hz_ RTC_GUARDED_BY(crit_) = 0; + std::vector<std::unique_ptr<Suppressor>> suppressors_ RTC_GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(NoiseSuppressionImpl); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_unittest.cc new file mode 100644 index 0000000000..0b734fdd3f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/noise_suppression_unittest.cc @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/noise_suppression_impl.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 1000; + +// Process one frame of data and produce the output. +void ProcessOneFrame(int sample_rate_hz, + AudioBuffer* capture_buffer, + NoiseSuppressionImpl* noise_suppressor) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_buffer->SplitIntoFrequencyBands(); + } + + noise_suppressor->AnalyzeCaptureAudio(capture_buffer); + noise_suppressor->ProcessCaptureAudio(capture_buffer); + + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + capture_buffer->MergeFrequencyBands(); + } +} + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + NoiseSuppressionImpl::Level level, + float speech_probability_reference, + rtc::ArrayView<const float> noise_estimate_reference, + rtc::ArrayView<const float> output_reference) { + rtc::CriticalSection crit_capture; + NoiseSuppressionImpl noise_suppressor(&crit_capture); + noise_suppressor.Initialize(num_channels, sample_rate_hz); + noise_suppressor.Enable(true); + noise_suppressor.set_level(level); + + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(samples_per_channel * num_channels); + for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, &capture_buffer, &noise_suppressor); + } + + // Extract test results. + std::vector<float> capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + float speech_probability = noise_suppressor.speech_probability(); + std::vector<float> noise_estimate = noise_suppressor.NoiseEstimate(); + + const float kVectorElementErrorBound = 1.0f / 32768.0f; + EXPECT_FLOAT_EQ(speech_probability_reference, speech_probability); + EXPECT_TRUE(test::VerifyArray(noise_estimate_reference, noise_estimate, + kVectorElementErrorBound)); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceeding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kVectorElementErrorBound)); +} + +} // namespace + +TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLow) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {1432.341431f, 3321.919922f, 7677.521973f}; + const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {1432.341431f, 3321.919922f, 7677.521973f}; + const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f}; +#else + const float kSpeechProbabilityReference = 0.73421317f; + const float kNoiseEstimateReference[] = + {1175.266113f, 3289.305908f, 7532.991211f}; + const float kOutputReference[] = {0.003263f, 0.004402f, 0.004537f}; +#endif + + RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLow) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2534.461914f, 6277.638672f, 14367.499023f}; + const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2534.461914f, 6277.638672f, 14367.499023f}; + const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f}; +#else + const float kSpeechProbabilityReference = 0.71672988f; + const float kNoiseEstimateReference[] = + {2151.313965f, 6509.765137f, 15658.848633f}; + const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f}; +#endif + + RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kLow, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLow) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2540.059082f, 6317.822754f, 14440.845703f}; + const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2540.059082f, 6317.822754f, 14440.845703f}; + const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f}; +#else + const float kSpeechProbabilityReference = 0.67999554f; + const float kNoiseEstimateReference[] = + {2149.780518f, 7076.936035f, 14939.945312f}; + const float kOutputReference[] = {0.001221f, 0.001984f, 0.002228f}; +#endif + + RunBitexactnessTest(32000, 1, NoiseSuppression::Level::kLow, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLow) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2564.605713f, 6213.656250f, 13372.284180f}; + const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2564.605713f, 6213.656250f, 13372.284180f}; + const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; +#else + const float kSpeechProbabilityReference = 0.70645678f; + const float kNoiseEstimateReference[] = + {2168.783203f, 6902.895508f, 13190.677734f}; + const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f}; +#endif + + RunBitexactnessTest(48000, 1, NoiseSuppression::Level::kLow, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLow) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {9992.127930f, 12689.569336f, 11589.296875f}; + const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f, + -0.002441f, 0.000855f, -0.003204f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {10321.353516f, 12133.852539f, 10923.060547f}; + const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f, + -0.002472f, 0.000916f, -0.003235f}; +#else + const float kSpeechProbabilityReference = 0.67230678f; + const float kNoiseEstimateReference[] = + {9771.250000f, 11329.377930f, 10503.052734f}; + const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f, + -0.002399f, 0.001018f, -0.003189f}; +#endif + + RunBitexactnessTest(16000, 2, NoiseSuppression::Level::kLow, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerate) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2057.085938f, 7601.055176f, 19666.187500f}; + const float kOutputReference[] = {0.004669f, 0.005524f, 0.005432f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2244.497803f, 6864.164062f, 16726.523438f}; + const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f}; +#else + const float kSpeechProbabilityReference = 0.70897013f; + const float kNoiseEstimateReference[] = + {2171.490723f, 6553.567871f, 15626.562500f}; + const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f}; +#endif + + RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kModerate, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHigh) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2095.148193f, 7698.553711f, 19689.533203f}; + const float kOutputReference[] = {0.004639f, 0.005402f, 0.005310f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2282.515625f, 6984.408203f, 16920.960938f}; + const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f}; +#else + const float kSpeechProbabilityReference = 0.70106733f; + const float kNoiseEstimateReference[] = + {2224.968506f, 6712.025879f, 15785.087891f}; + const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f}; +#endif + + RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kHigh, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) { +#if defined(WEBRTC_ARCH_ARM64) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2677.733398f, 6186.987305f, 14365.744141f}; + const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f}; +#elif defined(WEBRTC_ARCH_ARM) + const float kSpeechProbabilityReference = -4.0f; + const float kNoiseEstimateReference[] = + {2677.733398f, 6186.987305f, 14365.744141f}; + const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f}; +#else + const float kSpeechProbabilityReference = 0.70281971f; + const float kNoiseEstimateReference[] = + {2254.347900f, 6723.699707f, 15771.625977f}; + const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f}; +#endif + + RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kVeryHigh, + kSpeechProbabilityReference, kNoiseEstimateReference, + kOutputReference); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/defines.h b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/defines.h new file mode 100644 index 0000000000..66b45a9565 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/defines.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ +#define MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ + +#define BLOCKL_MAX 160 // max processing block length: 160 +#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256 +#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 +#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2 + +#define QUANTILE (float)0.25 + +#define SIMULT 3 +#define END_STARTUP_LONG 200 +#define END_STARTUP_SHORT 50 +#define FACTOR (float)40.0 +#define WIDTH (float)0.01 + +// Length of fft work arrays. +#define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2)) +#define W_LENGTH (ANAL_BLOCKL_MAX >> 1) + +//PARAMETERS FOR NEW METHOD +#define DD_PR_SNR (float)0.98 // DD update of prior SNR +#define LRT_TAVG (float)0.50 // tavg parameter for LRT (previously 0.90) +#define SPECT_FL_TAVG (float)0.30 // tavg parameter for spectral flatness measure +#define SPECT_DIFF_TAVG (float)0.30 // tavg parameter for spectral difference measure +#define PRIOR_UPDATE (float)0.10 // update parameter of prior model +#define NOISE_UPDATE (float)0.90 // update parameter for noise +#define SPEECH_UPDATE (float)0.99 // update parameter when likely speech +#define WIDTH_PR_MAP (float)4.0 // width parameter in sigmoid map for prior model +#define LRT_FEATURE_THR (float)0.5 // default threshold for LRT feature +#define SF_FEATURE_THR (float)0.5 // default threshold for Spectral Flatness feature +#define SD_FEATURE_THR (float)0.5 // default threshold for Spectral Difference feature +#define PROB_RANGE (float)0.20 // probability threshold for noise state in + // speech/noise likelihood +#define HIST_PAR_EST 1000 // histogram size for estimation of parameters +#define GAMMA_PAUSE (float)0.05 // update for conservative noise estimate +// +#define B_LIM (float)0.5 // threshold in final energy gain factor calculation +#endif // MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c new file mode 100644 index 0000000000..e21416f956 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.c @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppression.h" + +#include <stdlib.h> +#include <string.h> + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/ns/defines.h" +#include "modules/audio_processing/ns/ns_core.h" + +NsHandle* WebRtcNs_Create() { + NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC)); + self->initFlag = 0; + return (NsHandle*)self; +} + +void WebRtcNs_Free(NsHandle* NS_inst) { + free(NS_inst); +} + +int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) { + return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs); +} + +int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) { + return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode); +} + +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) { + WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe); +} + +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + size_t num_bands, + float* const* outframe) { + WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands, + outframe); +} + +float WebRtcNs_prior_speech_probability(NsHandle* handle) { + NoiseSuppressionC* self = (NoiseSuppressionC*)handle; + if (handle == NULL) { + return -1; + } + if (self->initFlag == 0) { + return -1; + } + return self->priorSpeechProb; +} + +const float* WebRtcNs_noise_estimate(const NsHandle* handle) { + const NoiseSuppressionC* self = (const NoiseSuppressionC*)handle; + if (handle == NULL || self->initFlag == 0) { + return NULL; + } + return self->noise; +} + +size_t WebRtcNs_num_freq() { + return HALF_ANAL_BLOCKL; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.h b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.h new file mode 100644 index 0000000000..a167142a7f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ + +#include <stddef.h> + +#include "typedefs.h" // NOLINT(build/include) + +typedef struct NsHandleT NsHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function creates an instance of the floating point Noise Suppression. + */ +NsHandle* WebRtcNs_Create(); + +/* + * This function frees the dynamic memory of a specified noise suppression + * instance. + * + * Input: + * - NS_inst : Pointer to NS instance that should be freed + */ +void WebRtcNs_Free(NsHandle* NS_inst); + +/* + * This function initializes a NS instance and has to be called before any other + * processing is made. + * + * Input: + * - NS_inst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - NS_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - NS_inst : Noise suppression instance. + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - NS_inst : Updated instance. + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_set_policy(NsHandle* NS_inst, int mode); + +/* + * This functions estimates the background noise for the inserted speech frame. + * The input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - NS_inst : Noise suppression instance. + * - spframe : Pointer to speech frame buffer for L band + * + * Output: + * - NS_inst : Updated NS instance + */ +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe); + +/* + * This functions does Noise Suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - NS_inst : Noise suppression instance. + * - spframe : Pointer to speech frame buffer for each band + * - num_bands : Number of bands + * + * Output: + * - NS_inst : Updated NS instance + * - outframe : Pointer to output frame for each band + */ +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + size_t num_bands, + float* const* outframe); + +/* Returns the internally used prior speech probability of the current frame. + * There is a frequency bin based one as well, with which this should not be + * confused. + * + * Input + * - handle : Noise suppression instance. + * + * Return value : Prior speech probability in interval [0.0, 1.0]. + * -1 - NULL pointer or uninitialized instance. + */ +float WebRtcNs_prior_speech_probability(NsHandle* handle); + +/* Returns a pointer to the noise estimate per frequency bin. The number of + * frequency bins can be provided using WebRtcNs_num_freq(). + * + * Input + * - handle : Noise suppression instance. + * + * Return value : Pointer to the noise estimate per frequency bin. + * Returns NULL if the input is a NULL pointer or an + * uninitialized instance. + */ +const float* WebRtcNs_noise_estimate(const NsHandle* handle); + +/* Returns the number of frequency bins, which is the length of the noise + * estimate for example. + * + * Return value : Number of frequency bins. + */ +size_t WebRtcNs_num_freq(); + +#ifdef __cplusplus +} +#endif + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.c b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.c new file mode 100644 index 0000000000..1fd3ebc67e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppression_x.h" + +#include <stdlib.h> + +#include "common_audio/signal_processing/include/real_fft.h" +#include "modules/audio_processing/ns/nsx_core.h" +#include "modules/audio_processing/ns/nsx_defines.h" + +NsxHandle* WebRtcNsx_Create() { + NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC)); + WebRtcSpl_Init(); + self->real_fft = NULL; + self->initFlag = 0; + return (NsxHandle*)self; +} + +void WebRtcNsx_Free(NsxHandle* nsxInst) { + WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft); + free(nsxInst); +} + +int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) { + return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs); +} + +int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) { + return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode); +} + +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { + WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame, + num_bands, outFrame); +} + +const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst, + int* q_noise) { + *q_noise = 11; + const NoiseSuppressionFixedC* self = (const NoiseSuppressionFixedC*)nsxInst; + if (nsxInst == NULL || self->initFlag == 0) { + return NULL; + } + *q_noise += self->prevQNoise; + return self->prevNoiseU32; +} + +size_t WebRtcNsx_num_freq() { + return HALF_ANAL_BLOCKL; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.h b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.h new file mode 100644 index 0000000000..838861db79 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/noise_suppression_x.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ +#define MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ + +#include <stddef.h> + +#include "typedefs.h" // NOLINT(build/include) + +typedef struct NsxHandleT NsxHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function creates an instance of the fixed point Noise Suppression. + */ +NsxHandle* WebRtcNsx_Create(); + +/* + * This function frees the dynamic memory of a specified Noise Suppression + * instance. + * + * Input: + * - nsxInst : Pointer to NS instance that should be freed + */ +void WebRtcNsx_Free(NsxHandle* nsxInst); + +/* + * This function initializes a NS instance + * + * Input: + * - nsxInst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - nsxInst : Instance that should be initialized + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode); + +/* + * This functions does noise suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - nsxInst : NSx instance. Needs to be initiated before call. + * - speechFrame : Pointer to speech frame buffer for each band + * - num_bands : Number of bands + * + * Output: + * - nsxInst : Updated NSx instance + * - outFrame : Pointer to output frame for each band + */ +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame); + +/* Returns a pointer to the noise estimate per frequency bin. The number of + * frequency bins can be provided using WebRtcNsx_num_freq(). + * + * Input + * - nsxInst : NSx instance. Needs to be initiated before call. + * - q_noise : Q value of the noise estimate, which is the number of + * bits that it needs to be right-shifted to be + * normalized. + * + * Return value : Pointer to the noise estimate per frequency bin. + * Returns NULL if the input is a NULL pointer or an + * uninitialized instance. + */ +const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst, + int* q_noise); + +/* Returns the number of frequency bins, which is the length of the noise + * estimate for example. + * + * Return value : Number of frequency bins. + */ +size_t WebRtcNsx_num_freq(); + +#ifdef __cplusplus +} +#endif + +#endif // MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c new file mode 100644 index 0000000000..3345b2451e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.c @@ -0,0 +1,1418 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> +#include <string.h> +#include <stdlib.h> + +#include "rtc_base/checks.h" +#include "common_audio/fft4g.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/ns/noise_suppression.h" +#include "modules/audio_processing/ns/ns_core.h" +#include "modules/audio_processing/ns/windows_private.h" + +// Set Feature Extraction Parameters. +static void set_feature_extraction_parameters(NoiseSuppressionC* self) { + // Bin size of histogram. + self->featureExtractionParams.binSizeLrt = 0.1f; + self->featureExtractionParams.binSizeSpecFlat = 0.05f; + self->featureExtractionParams.binSizeSpecDiff = 0.1f; + + // Range of histogram over which LRT threshold is computed. + self->featureExtractionParams.rangeAvgHistLrt = 1.f; + + // Scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model. + // For LRT and spectral difference. + self->featureExtractionParams.factor1ModelPars = 1.2f; + // For spectral_flatness: used when noise is flatter than speech. + self->featureExtractionParams.factor2ModelPars = 0.9f; + + // Peak limit for spectral flatness (varies between 0 and 1). + self->featureExtractionParams.thresPosSpecFlat = 0.6f; + + // Limit on spacing of two highest peaks in histogram: spacing determined by + // bin size. + self->featureExtractionParams.limitPeakSpacingSpecFlat = + 2 * self->featureExtractionParams.binSizeSpecFlat; + self->featureExtractionParams.limitPeakSpacingSpecDiff = + 2 * self->featureExtractionParams.binSizeSpecDiff; + + // Limit on relevance of second peak. + self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f; + self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f; + + // Fluctuation limit of LRT feature. + self->featureExtractionParams.thresFluctLrt = 0.05f; + + // Limit on the max and min values for the feature thresholds. + self->featureExtractionParams.maxLrt = 1.f; + self->featureExtractionParams.minLrt = 0.2f; + + self->featureExtractionParams.maxSpecFlat = 0.95f; + self->featureExtractionParams.minSpecFlat = 0.1f; + + self->featureExtractionParams.maxSpecDiff = 1.f; + self->featureExtractionParams.minSpecDiff = 0.16f; + + // Criteria of weight of histogram peak to accept/reject feature. + self->featureExtractionParams.thresWeightSpecFlat = + (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness. + self->featureExtractionParams.thresWeightSpecDiff = + (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference. +} + +// Initialize state. +int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) { + int i; + // Check for valid pointer. + if (self == NULL) { + return -1; + } + + // Initialization of struct. + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { + self->fs = fs; + } else { + return -1; + } + self->windShift = 0; + // We only support 10ms frames. + if (fs == 8000) { + self->blockLen = 80; + self->anaLen = 128; + self->window = kBlocks80w128; + } else { + self->blockLen = 160; + self->anaLen = 256; + self->window = kBlocks160w256; + } + self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins. + + // Initialize FFT work arrays. + self->ip[0] = 0; // Setting this triggers initialization. + memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft); + + memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + + // For HB processing. + memset(self->dataBufHB, + 0, + sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); + + // For quantile noise estimation. + memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); + for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { + self->lquantile[i] = 8.f; + self->density[i] = 0.3f; + } + + for (i = 0; i < SIMULT; i++) { + self->counter[i] = + (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); + } + + self->updates = 0; + + // Wiener filter initialization. + for (i = 0; i < HALF_ANAL_BLOCKL; i++) { + self->smooth[i] = 1.f; + } + + // Set the aggressiveness: default. + self->aggrMode = 0; + + // Initialize variables for new method. + self->priorSpeechProb = 0.5f; // Prior prob for speech/noise. + // Previous analyze mag spectrum. + memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Previous process mag spectrum. + memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Current noise-spectrum. + memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Previous noise-spectrum. + memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Conservative noise spectrum estimate. + memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // For estimation of HB in second pass. + memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Initial average magnitude spectrum. + memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL); + for (i = 0; i < HALF_ANAL_BLOCKL; i++) { + // Smooth LR (same as threshold). + self->logLrtTimeAvg[i] = LRT_FEATURE_THR; + } + + // Feature quantities. + // Spectral flatness (start on threshold). + self->featureData[0] = SF_FEATURE_THR; + self->featureData[1] = 0.f; // Spectral entropy: not used in this version. + self->featureData[2] = 0.f; // Spectral variance: not used in this version. + // Average LRT factor (start on threshold). + self->featureData[3] = LRT_FEATURE_THR; + // Spectral template diff (start on threshold). + self->featureData[4] = SF_FEATURE_THR; + self->featureData[5] = 0.f; // Normalization for spectral difference. + // Window time-average of input magnitude spectrum. + self->featureData[6] = 0.f; + + memset(self->parametricNoise, 0, sizeof(float) * HALF_ANAL_BLOCKL); + + // Histogram quantities: used to estimate/update thresholds for features. + memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST); + memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST); + memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST); + + + self->blockInd = -1; // Frame counter. + // Default threshold for LRT feature. + self->priorModelPars[0] = LRT_FEATURE_THR; + // Threshold for spectral flatness: determined on-line. + self->priorModelPars[1] = 0.5f; + // sgn_map par for spectral measure: 1 for flatness measure. + self->priorModelPars[2] = 1.f; + // Threshold for template-difference feature: determined on-line. + self->priorModelPars[3] = 0.5f; + // Default weighting parameter for LRT feature. + self->priorModelPars[4] = 1.f; + // Default weighting parameter for spectral flatness feature. + self->priorModelPars[5] = 0.f; + // Default weighting parameter for spectral difference feature. + self->priorModelPars[6] = 0.f; + + // Update flag for parameters: + // 0 no update, 1 = update once, 2 = update every window. + self->modelUpdatePars[0] = 2; + self->modelUpdatePars[1] = 500; // Window for update. + // Counter for update of conservative noise spectrum. + self->modelUpdatePars[2] = 0; + // Counter if the feature thresholds are updated during the sequence. + self->modelUpdatePars[3] = self->modelUpdatePars[1]; + + self->signalEnergy = 0.0; + self->sumMagn = 0.0; + self->whiteNoiseLevel = 0.0; + self->pinkNoiseNumerator = 0.0; + self->pinkNoiseExp = 0.0; + + set_feature_extraction_parameters(self); + + // Default mode. + WebRtcNs_set_policy_core(self, 0); + + self->initFlag = 1; + return 0; +} + +// Estimate noise. +static void NoiseEstimation(NoiseSuppressionC* self, + float* magn, + float* noise) { + size_t i, s, offset; + float lmagn[HALF_ANAL_BLOCKL], delta; + + if (self->updates < END_STARTUP_LONG) { + self->updates++; + } + + for (i = 0; i < self->magnLen; i++) { + lmagn[i] = (float)log(magn[i]); + } + + // Loop over simultaneous estimates. + for (s = 0; s < SIMULT; s++) { + offset = s * self->magnLen; + + // newquantest(...) + for (i = 0; i < self->magnLen; i++) { + // Compute delta. + if (self->density[offset + i] > 1.0) { + delta = FACTOR * 1.f / self->density[offset + i]; + } else { + delta = FACTOR; + } + + // Update log quantile estimate. + if (lmagn[i] > self->lquantile[offset + i]) { + self->lquantile[offset + i] += + QUANTILE * delta / (float)(self->counter[s] + 1); + } else { + self->lquantile[offset + i] -= + (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1); + } + + // Update density estimate. + if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) { + self->density[offset + i] = + ((float)self->counter[s] * self->density[offset + i] + + 1.f / (2.f * WIDTH)) / + (float)(self->counter[s] + 1); + } + } // End loop over magnitude spectrum. + + if (self->counter[s] >= END_STARTUP_LONG) { + self->counter[s] = 0; + if (self->updates >= END_STARTUP_LONG) { + for (i = 0; i < self->magnLen; i++) { + self->quantile[i] = (float)exp(self->lquantile[offset + i]); + } + } + } + + self->counter[s]++; + } // End loop over simultaneous estimates. + + // Sequentially update the noise during startup. + if (self->updates < END_STARTUP_LONG) { + // Use the last "s" to get noise during startup that differ from zero. + for (i = 0; i < self->magnLen; i++) { + self->quantile[i] = (float)exp(self->lquantile[offset + i]); + } + } + + for (i = 0; i < self->magnLen; i++) { + noise[i] = self->quantile[i]; + } +} + +// Extract thresholds for feature parameters. +// Histograms are computed over some window size (given by +// self->modelUpdatePars[1]). +// Thresholds and weights are extracted every window. +// |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights. +// Threshold and weights are returned in: self->priorModelPars. +static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) { + int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; + int maxPeak1, maxPeak2; + int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, + weightPeak2SpecDiff; + + float binMid, featureSum; + float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; + float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; + + // 3 features: LRT, flatness, difference. + // lrt_feature = self->featureData[3]; + // flat_feature = self->featureData[0]; + // diff_feature = self->featureData[4]; + + // Update histograms. + if (flag == 0) { + // LRT + if ((self->featureData[3] < + HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) && + (self->featureData[3] >= 0.0)) { + i = (int)(self->featureData[3] / + self->featureExtractionParams.binSizeLrt); + self->histLrt[i]++; + } + // Spectral flatness. + if ((self->featureData[0] < + HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) && + (self->featureData[0] >= 0.0)) { + i = (int)(self->featureData[0] / + self->featureExtractionParams.binSizeSpecFlat); + self->histSpecFlat[i]++; + } + // Spectral difference. + if ((self->featureData[4] < + HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) && + (self->featureData[4] >= 0.0)) { + i = (int)(self->featureData[4] / + self->featureExtractionParams.binSizeSpecDiff); + self->histSpecDiff[i]++; + } + } + + // Extract parameters for speech/noise probability. + if (flag == 1) { + // LRT feature: compute the average over + // self->featureExtractionParams.rangeAvgHistLrt. + avgHistLrt = 0.0; + avgHistLrtCompl = 0.0; + avgSquareHistLrt = 0.0; + numHistLrt = 0; + for (i = 0; i < HIST_PAR_EST; i++) { + binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt; + if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) { + avgHistLrt += self->histLrt[i] * binMid; + numHistLrt += self->histLrt[i]; + } + avgSquareHistLrt += self->histLrt[i] * binMid * binMid; + avgHistLrtCompl += self->histLrt[i] * binMid; + } + if (numHistLrt > 0) { + avgHistLrt = avgHistLrt / ((float)numHistLrt); + } + avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]); + avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]); + fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; + // Get threshold for LRT feature. + if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { + // Very low fluctuation, so likely noise. + self->priorModelPars[0] = self->featureExtractionParams.maxLrt; + } else { + self->priorModelPars[0] = + self->featureExtractionParams.factor1ModelPars * avgHistLrt; + // Check if value is within min/max range. + if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) { + self->priorModelPars[0] = self->featureExtractionParams.minLrt; + } + if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) { + self->priorModelPars[0] = self->featureExtractionParams.maxLrt; + } + } + // Done with LRT feature. + + // For spectral flatness and spectral difference: compute the main peaks of + // histogram. + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecFlat = 0.0; + posPeak2SpecFlat = 0.0; + weightPeak1SpecFlat = 0; + weightPeak2SpecFlat = 0; + + // Peaks for flatness. + for (i = 0; i < HIST_PAR_EST; i++) { + binMid = + (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat; + if (self->histSpecFlat[i] > maxPeak1) { + // Found new "first" peak. + maxPeak2 = maxPeak1; + weightPeak2SpecFlat = weightPeak1SpecFlat; + posPeak2SpecFlat = posPeak1SpecFlat; + + maxPeak1 = self->histSpecFlat[i]; + weightPeak1SpecFlat = self->histSpecFlat[i]; + posPeak1SpecFlat = binMid; + } else if (self->histSpecFlat[i] > maxPeak2) { + // Found new "second" peak. + maxPeak2 = self->histSpecFlat[i]; + weightPeak2SpecFlat = self->histSpecFlat[i]; + posPeak2SpecFlat = binMid; + } + } + + // Compute two peaks for spectral difference. + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecDiff = 0.0; + posPeak2SpecDiff = 0.0; + weightPeak1SpecDiff = 0; + weightPeak2SpecDiff = 0; + // Peaks for spectral difference. + for (i = 0; i < HIST_PAR_EST; i++) { + binMid = + ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff; + if (self->histSpecDiff[i] > maxPeak1) { + // Found new "first" peak. + maxPeak2 = maxPeak1; + weightPeak2SpecDiff = weightPeak1SpecDiff; + posPeak2SpecDiff = posPeak1SpecDiff; + + maxPeak1 = self->histSpecDiff[i]; + weightPeak1SpecDiff = self->histSpecDiff[i]; + posPeak1SpecDiff = binMid; + } else if (self->histSpecDiff[i] > maxPeak2) { + // Found new "second" peak. + maxPeak2 = self->histSpecDiff[i]; + weightPeak2SpecDiff = self->histSpecDiff[i]; + posPeak2SpecDiff = binMid; + } + } + + // For spectrum flatness feature. + useFeatureSpecFlat = 1; + // Merge the two peaks if they are close. + if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) < + self->featureExtractionParams.limitPeakSpacingSpecFlat) && + (weightPeak2SpecFlat > + self->featureExtractionParams.limitPeakWeightsSpecFlat * + weightPeak1SpecFlat)) { + weightPeak1SpecFlat += weightPeak2SpecFlat; + posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat); + } + // Reject if weight of peaks is not large enough, or peak value too small. + if (weightPeak1SpecFlat < + self->featureExtractionParams.thresWeightSpecFlat || + posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) { + useFeatureSpecFlat = 0; + } + // If selected, get the threshold. + if (useFeatureSpecFlat == 1) { + // Compute the threshold. + self->priorModelPars[1] = + self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat; + // Check if value is within min/max range. + if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) { + self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat; + } + if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) { + self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat; + } + } + // Done with flatness feature. + + // For template feature. + useFeatureSpecDiff = 1; + // Merge the two peaks if they are close. + if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) < + self->featureExtractionParams.limitPeakSpacingSpecDiff) && + (weightPeak2SpecDiff > + self->featureExtractionParams.limitPeakWeightsSpecDiff * + weightPeak1SpecDiff)) { + weightPeak1SpecDiff += weightPeak2SpecDiff; + posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff); + } + // Get the threshold value. + self->priorModelPars[3] = + self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff; + // Reject if weight of peaks is not large enough. + if (weightPeak1SpecDiff < + self->featureExtractionParams.thresWeightSpecDiff) { + useFeatureSpecDiff = 0; + } + // Check if value is within min/max range. + if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) { + self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff; + } + if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) { + self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff; + } + // Done with spectral difference feature. + + // Don't use template feature if fluctuation of LRT feature is very low: + // most likely just noise state. + if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { + useFeatureSpecDiff = 0; + } + + // Select the weights between the features. + // self->priorModelPars[4] is weight for LRT: always selected. + // self->priorModelPars[5] is weight for spectral flatness. + // self->priorModelPars[6] is weight for spectral difference. + featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); + self->priorModelPars[4] = 1.f / featureSum; + self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; + self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; + + // Set hists to zero for next update. + if (self->modelUpdatePars[0] >= 1) { + for (i = 0; i < HIST_PAR_EST; i++) { + self->histLrt[i] = 0; + self->histSpecFlat[i] = 0; + self->histSpecDiff[i] = 0; + } + } + } // End of flag == 1. +} + +// Compute spectral flatness on input spectrum. +// |magnIn| is the magnitude spectrum. +// Spectral flatness is returned in self->featureData[0]. +static void ComputeSpectralFlatness(NoiseSuppressionC* self, + const float* magnIn) { + size_t i; + size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures. + float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; + + // Compute spectral measures. + // For flatness. + avgSpectralFlatnessNum = 0.0; + avgSpectralFlatnessDen = self->sumMagn; + for (i = 0; i < shiftLP; i++) { + avgSpectralFlatnessDen -= magnIn[i]; + } + // Compute log of ratio of the geometric to arithmetic mean: check for log(0) + // case. + for (i = shiftLP; i < self->magnLen; i++) { + if (magnIn[i] > 0.0) { + avgSpectralFlatnessNum += (float)log(magnIn[i]); + } else { + self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0]; + return; + } + } + // Normalize. + avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen; + avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen; + + // Ratio and inverse log: check for case of log(0). + spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; + + // Time-avg update of spectral flatness feature. + self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]); + // Done with flatness feature. +} + +// Compute prior and post SNR based on quantile noise estimation. +// Compute DD estimate of prior SNR. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |noise| is the magnitude noise spectrum estimate. +// Outputs: +// * |snrLocPrior| is the computed prior SNR. +// * |snrLocPost| is the computed post SNR. +static void ComputeSnr(const NoiseSuppressionC* self, + const float* magn, + const float* noise, + float* snrLocPrior, + float* snrLocPost) { + size_t i; + + for (i = 0; i < self->magnLen; i++) { + // Previous post SNR. + // Previous estimate: based on previous frame with gain filter. + float previousEstimateStsa = self->magnPrevAnalyze[i] / + (self->noisePrev[i] + 0.0001f) * self->smooth[i]; + // Post SNR. + snrLocPost[i] = 0.f; + if (magn[i] > noise[i]) { + snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f; + } + // DD estimate is sum of two terms: current estimate and previous estimate. + // Directed decision update of snrPrior. + snrLocPrior[i] = + DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i]; + } // End of loop over frequencies. +} + +// Compute the difference measure between input spectrum and a template/learned +// noise spectrum. +// |magnIn| is the input spectrum. +// The reference/template spectrum is self->magnAvgPause[i]. +// Returns (normalized) spectral difference in self->featureData[4]. +static void ComputeSpectralDifference(NoiseSuppressionC* self, + const float* magnIn) { + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / + // var(magnAvgPause) + size_t i; + float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; + + avgPause = 0.0; + avgMagn = self->sumMagn; + // Compute average quantities. + for (i = 0; i < self->magnLen; i++) { + // Conservative smooth noise spectrum from pause frames. + avgPause += self->magnAvgPause[i]; + } + avgPause /= self->magnLen; + avgMagn /= self->magnLen; + + covMagnPause = 0.0; + varPause = 0.0; + varMagn = 0.0; + // Compute variance and covariance quantities. + for (i = 0; i < self->magnLen; i++) { + covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause); + varPause += + (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause); + varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); + } + covMagnPause /= self->magnLen; + varPause /= self->magnLen; + varMagn /= self->magnLen; + // Update of average magnitude spectrum. + self->featureData[6] += self->signalEnergy; + + avgDiffNormMagn = + varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f); + // Normalize and compute time-avg update of difference feature. + avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f)); + self->featureData[4] += + SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]); +} + +// Compute speech/noise probability. +// Speech/noise probability is returned in |probSpeechFinal|. +// |magn| is the input magnitude spectrum. +// |noise| is the noise spectrum. +// |snrLocPrior| is the prior SNR for each frequency. +// |snrLocPost| is the post SNR for each frequency. +static void SpeechNoiseProb(NoiseSuppressionC* self, + float* probSpeechFinal, + const float* snrLocPrior, + const float* snrLocPost) { + size_t i; + int sgnMap; + float invLrt, gainPrior, indPrior; + float logLrtTimeAvgKsum, besselTmp; + float indicator0, indicator1, indicator2; + float tmpFloat1, tmpFloat2; + float weightIndPrior0, weightIndPrior1, weightIndPrior2; + float threshPrior0, threshPrior1, threshPrior2; + float widthPrior, widthPrior0, widthPrior1, widthPrior2; + + widthPrior0 = WIDTH_PR_MAP; + // Width for pause region: lower range, so increase width in tanh map. + widthPrior1 = 2.f * WIDTH_PR_MAP; + widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure. + + // Threshold parameters for features. + threshPrior0 = self->priorModelPars[0]; + threshPrior1 = self->priorModelPars[1]; + threshPrior2 = self->priorModelPars[3]; + + // Sign for flatness feature. + sgnMap = (int)(self->priorModelPars[2]); + + // Weight parameters for features. + weightIndPrior0 = self->priorModelPars[4]; + weightIndPrior1 = self->priorModelPars[5]; + weightIndPrior2 = self->priorModelPars[6]; + + // Compute feature based on average LR factor. + // This is the average over all frequencies of the smooth log LRT. + logLrtTimeAvgKsum = 0.0; + for (i = 0; i < self->magnLen; i++) { + tmpFloat1 = 1.f + 2.f * snrLocPrior[i]; + tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f); + besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2; + self->logLrtTimeAvg[i] += + LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]); + logLrtTimeAvgKsum += self->logLrtTimeAvg[i]; + } + logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen); + self->featureData[3] = logLrtTimeAvgKsum; + // Done with computation of LR factor. + + // Compute the indicator functions. + // Average LRT feature. + widthPrior = widthPrior0; + // Use larger width in tanh map for pause regions. + if (logLrtTimeAvgKsum < threshPrior0) { + widthPrior = widthPrior1; + } + // Compute indicator function: sigmoid map. + indicator0 = + 0.5f * + ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f); + + // Spectral flatness feature. + tmpFloat1 = self->featureData[0]; + widthPrior = widthPrior0; + // Use larger width in tanh map for pause regions. + if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { + widthPrior = widthPrior1; + } + if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { + widthPrior = widthPrior1; + } + // Compute indicator function: sigmoid map. + indicator1 = + 0.5f * + ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + + 1.f); + + // For template spectrum-difference. + tmpFloat1 = self->featureData[4]; + widthPrior = widthPrior0; + // Use larger width in tanh map for pause regions. + if (tmpFloat1 < threshPrior2) { + widthPrior = widthPrior2; + } + // Compute indicator function: sigmoid map. + indicator2 = + 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f); + + // Combine the indicator function with the feature weights. + indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + + weightIndPrior2 * indicator2; + // Done with computing indicator function. + + // Compute the prior probability. + self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb); + // Make sure probabilities are within range: keep floor to 0.01. + if (self->priorSpeechProb > 1.f) { + self->priorSpeechProb = 1.f; + } + if (self->priorSpeechProb < 0.01f) { + self->priorSpeechProb = 0.01f; + } + + // Final speech probability: combine prior model with LR factor:. + gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f); + for (i = 0; i < self->magnLen; i++) { + invLrt = (float)exp(-self->logLrtTimeAvg[i]); + invLrt = (float)gainPrior * invLrt; + probSpeechFinal[i] = 1.f / (1.f + invLrt); + } +} + +// Update the noise features. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |updateParsFlag| is an update flag for parameters. +static void FeatureUpdate(NoiseSuppressionC* self, + const float* magn, + int updateParsFlag) { + // Compute spectral flatness on input spectrum. + ComputeSpectralFlatness(self, magn); + // Compute difference of input spectrum with learned/estimated noise spectrum. + ComputeSpectralDifference(self, magn); + // Compute histograms for parameter decisions (thresholds and weights for + // features). + // Parameters are extracted once every window time. + // (=self->modelUpdatePars[1]) + if (updateParsFlag >= 1) { + // Counter update. + self->modelUpdatePars[3]--; + // Update histogram. + if (self->modelUpdatePars[3] > 0) { + FeatureParameterExtraction(self, 0); + } + // Compute model parameters. + if (self->modelUpdatePars[3] == 0) { + FeatureParameterExtraction(self, 1); + self->modelUpdatePars[3] = self->modelUpdatePars[1]; + // If wish to update only once, set flag to zero. + if (updateParsFlag == 1) { + self->modelUpdatePars[0] = 0; + } else { + // Update every window: + // Get normalization for spectral difference for next window estimate. + self->featureData[6] = + self->featureData[6] / ((float)self->modelUpdatePars[1]); + self->featureData[5] = + 0.5f * (self->featureData[6] + self->featureData[5]); + self->featureData[6] = 0.f; + } + } + } +} + +// Update the noise estimate. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |snrLocPrior| is the prior SNR. +// * |snrLocPost| is the post SNR. +// Output: +// * |noise| is the updated noise magnitude spectrum estimate. +static void UpdateNoiseEstimate(NoiseSuppressionC* self, + const float* magn, + const float* snrLocPrior, + const float* snrLocPost, + float* noise) { + size_t i; + float probSpeech, probNonSpeech; + // Time-avg parameter for noise update. + float gammaNoiseTmp = NOISE_UPDATE; + float gammaNoiseOld; + float noiseUpdateTmp; + + for (i = 0; i < self->magnLen; i++) { + probSpeech = self->speechProb[i]; + probNonSpeech = 1.f - probSpeech; + // Temporary noise update: + // Use it for speech frames if update value is less than previous. + noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] + + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + + probSpeech * self->noisePrev[i]); + // Time-constant based on speech/noise state. + gammaNoiseOld = gammaNoiseTmp; + gammaNoiseTmp = NOISE_UPDATE; + // Increase gamma (i.e., less noise update) for frame likely to be speech. + if (probSpeech > PROB_RANGE) { + gammaNoiseTmp = SPEECH_UPDATE; + } + // Conservative noise update. + if (probSpeech < PROB_RANGE) { + self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]); + } + // Noise update. + if (gammaNoiseTmp == gammaNoiseOld) { + noise[i] = noiseUpdateTmp; + } else { + noise[i] = gammaNoiseTmp * self->noisePrev[i] + + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + + probSpeech * self->noisePrev[i]); + // Allow for noise update downwards: + // If noise update decreases the noise, it is safe, so allow it to + // happen. + if (noiseUpdateTmp < noise[i]) { + noise[i] = noiseUpdateTmp; + } + } + } // End of freq loop. +} + +// Updates |buffer| with a new |frame|. +// Inputs: +// * |frame| is a new speech frame or NULL for setting to zero. +// * |frame_length| is the length of the new frame. +// * |buffer_length| is the length of the buffer. +// Output: +// * |buffer| is the updated buffer. +static void UpdateBuffer(const float* frame, + size_t frame_length, + size_t buffer_length, + float* buffer) { + RTC_DCHECK_LT(buffer_length, 2 * frame_length); + + memcpy(buffer, + buffer + frame_length, + sizeof(*buffer) * (buffer_length - frame_length)); + if (frame) { + memcpy(buffer + buffer_length - frame_length, + frame, + sizeof(*buffer) * frame_length); + } else { + memset(buffer + buffer_length - frame_length, + 0, + sizeof(*buffer) * frame_length); + } +} + +// Transforms the signal from time to frequency domain. +// Inputs: +// * |time_data| is the signal in the time domain. +// * |time_data_length| is the length of the analysis buffer. +// * |magnitude_length| is the length of the spectrum magnitude, which equals +// the length of both |real| and |imag| (time_data_length / 2 + 1). +// Outputs: +// * |time_data| is the signal in the frequency domain. +// * |real| is the real part of the frequency domain. +// * |imag| is the imaginary part of the frequency domain. +// * |magn| is the calculated signal magnitude in the frequency domain. +static void FFT(NoiseSuppressionC* self, + float* time_data, + size_t time_data_length, + size_t magnitude_length, + float* real, + float* imag, + float* magn) { + size_t i; + + RTC_DCHECK_EQ(magnitude_length, time_data_length / 2 + 1); + + WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft); + + imag[0] = 0; + real[0] = time_data[0]; + magn[0] = fabsf(real[0]) + 1.f; + imag[magnitude_length - 1] = 0; + real[magnitude_length - 1] = time_data[1]; + magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f; + for (i = 1; i < magnitude_length - 1; ++i) { + real[i] = time_data[2 * i]; + imag[i] = time_data[2 * i + 1]; + // Magnitude spectrum. + magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f; + } +} + +// Transforms the signal from frequency to time domain. +// Inputs: +// * |real| is the real part of the frequency domain. +// * |imag| is the imaginary part of the frequency domain. +// * |magnitude_length| is the length of the spectrum magnitude, which equals +// the length of both |real| and |imag|. +// * |time_data_length| is the length of the analysis buffer +// (2 * (magnitude_length - 1)). +// Output: +// * |time_data| is the signal in the time domain. +static void IFFT(NoiseSuppressionC* self, + const float* real, + const float* imag, + size_t magnitude_length, + size_t time_data_length, + float* time_data) { + size_t i; + + RTC_DCHECK_EQ(time_data_length, 2 * (magnitude_length - 1)); + + time_data[0] = real[0]; + time_data[1] = real[magnitude_length - 1]; + for (i = 1; i < magnitude_length - 1; ++i) { + time_data[2 * i] = real[i]; + time_data[2 * i + 1] = imag[i]; + } + WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft); + + for (i = 0; i < time_data_length; ++i) { + time_data[i] *= 2.f / time_data_length; // FFT scaling. + } +} + +// Calculates the energy of a buffer. +// Inputs: +// * |buffer| is the buffer over which the energy is calculated. +// * |length| is the length of the buffer. +// Returns the calculated energy. +static float Energy(const float* buffer, size_t length) { + size_t i; + float energy = 0.f; + + for (i = 0; i < length; ++i) { + energy += buffer[i] * buffer[i]; + } + + return energy; +} + +// Windows a buffer. +// Inputs: +// * |window| is the window by which to multiply. +// * |data| is the data without windowing. +// * |length| is the length of the window and data. +// Output: +// * |data_windowed| is the windowed data. +static void Windowing(const float* window, + const float* data, + size_t length, + float* data_windowed) { + size_t i; + + for (i = 0; i < length; ++i) { + data_windowed[i] = window[i] * data[i]; + } +} + +// Estimate prior SNR decision-directed and compute DD based Wiener Filter. +// Input: +// * |magn| is the signal magnitude spectrum estimate. +// Output: +// * |theFilter| is the frequency response of the computed Wiener filter. +static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self, + const float* magn, + float* theFilter) { + size_t i; + float snrPrior, previousEstimateStsa, currentEstimateStsa; + + for (i = 0; i < self->magnLen; i++) { + // Previous estimate: based on previous frame with gain filter. + previousEstimateStsa = self->magnPrevProcess[i] / + (self->noisePrev[i] + 0.0001f) * self->smooth[i]; + // Post and prior SNR. + currentEstimateStsa = 0.f; + if (magn[i] > self->noise[i]) { + currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f; + } + // DD estimate is sum of two terms: current estimate and previous estimate. + // Directed decision update of |snrPrior|. + snrPrior = DD_PR_SNR * previousEstimateStsa + + (1.f - DD_PR_SNR) * currentEstimateStsa; + // Gain filter. + theFilter[i] = snrPrior / (self->overdrive + snrPrior); + } // End of loop over frequencies. +} + +// Changes the aggressiveness of the noise suppression method. +// |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is +// aggressive (15dB). +// Returns 0 on success and -1 otherwise. +int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) { + // Allow for modes: 0, 1, 2, 3. + if (mode < 0 || mode > 3) { + return (-1); + } + + self->aggrMode = mode; + if (mode == 0) { + self->overdrive = 1.f; + self->denoiseBound = 0.5f; + self->gainmap = 0; + } else if (mode == 1) { + // self->overdrive = 1.25f; + self->overdrive = 1.f; + self->denoiseBound = 0.25f; + self->gainmap = 1; + } else if (mode == 2) { + // self->overdrive = 1.25f; + self->overdrive = 1.1f; + self->denoiseBound = 0.125f; + self->gainmap = 1; + } else if (mode == 3) { + // self->overdrive = 1.3f; + self->overdrive = 1.25f; + self->denoiseBound = 0.09f; + self->gainmap = 1; + } + return 0; +} + +void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) { + size_t i; + const size_t kStartBand = 5; // Skip first frequency bins during estimation. + int updateParsFlag; + float energy; + float signalEnergy = 0.f; + float sumMagn = 0.f; + float tmpFloat1, tmpFloat2, tmpFloat3; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; + float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + // Variables during startup. + float sum_log_i = 0.0; + float sum_log_i_square = 0.0; + float sum_log_magn = 0.0; + float sum_log_i_log_magn = 0.0; + float parametric_exp = 0.0; + float parametric_num = 0.0; + + // Check that initiation has been done. + RTC_DCHECK_EQ(1, self->initFlag); + updateParsFlag = self->modelUpdatePars[0]; + + // Update analysis buffer for L band. + UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf); + + Windowing(self->window, self->analyzeBuf, self->anaLen, winData); + energy = Energy(winData, self->anaLen); + if (energy == 0.0) { + // We want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause + // thresholds to move towards zero signal situations. This in turn has the + // effect that once the signal is "turned on" (non-zero values) everything + // will be treated as speech and there is no noise suppression effect. + // Depending on the duration of the inactive signal it takes a + // considerable amount of time for the system to learn what is noise and + // what is speech. + return; + } + + self->blockInd++; // Update the block index only when we process a block. + + FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); + + for (i = 0; i < self->magnLen; i++) { + signalEnergy += real[i] * real[i] + imag[i] * imag[i]; + sumMagn += magn[i]; + if (self->blockInd < END_STARTUP_SHORT) { + if (i >= kStartBand) { + tmpFloat2 = logf((float)i); + sum_log_i += tmpFloat2; + sum_log_i_square += tmpFloat2 * tmpFloat2; + tmpFloat1 = logf(magn[i]); + sum_log_magn += tmpFloat1; + sum_log_i_log_magn += tmpFloat2 * tmpFloat1; + } + } + } + signalEnergy /= self->magnLen; + self->signalEnergy = signalEnergy; + self->sumMagn = sumMagn; + + // Quantile noise estimate. + NoiseEstimation(self, magn, noise); + // Compute simplified noise model during startup. + if (self->blockInd < END_STARTUP_SHORT) { + // Estimate White noise. + self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive; + // Estimate Pink noise parameters. + tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand); + tmpFloat1 -= (sum_log_i * sum_log_i); + tmpFloat2 = + (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the estimated spectrum to be positive. + if (tmpFloat3 < 0.f) { + tmpFloat3 = 0.f; + } + self->pinkNoiseNumerator += tmpFloat3; + tmpFloat2 = (sum_log_i * sum_log_magn); + tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn; + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the pink noise power to be in the interval [0, 1]. + if (tmpFloat3 < 0.f) { + tmpFloat3 = 0.f; + } + if (tmpFloat3 > 1.f) { + tmpFloat3 = 1.f; + } + self->pinkNoiseExp += tmpFloat3; + + // Calculate frequency independent parts of parametric noise estimate. + if (self->pinkNoiseExp > 0.f) { + // Use pink noise estimate. + parametric_num = + expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1)); + parametric_num *= (float)(self->blockInd + 1); + parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1); + } + for (i = 0; i < self->magnLen; i++) { + // Estimate the background noise using the white and pink noise + // parameters. + if (self->pinkNoiseExp == 0.f) { + // Use white noise estimate. + self->parametricNoise[i] = self->whiteNoiseLevel; + } else { + // Use pink noise estimate. + float use_band = (float)(i < kStartBand ? kStartBand : i); + self->parametricNoise[i] = + parametric_num / powf(use_band, parametric_exp); + } + // Weight quantile noise with modeled noise. + noise[i] *= (self->blockInd); + tmpFloat2 = + self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd); + noise[i] += (tmpFloat2 / (float)(self->blockInd + 1)); + noise[i] /= END_STARTUP_SHORT; + } + } + // Compute average signal during END_STARTUP_LONG time: + // used to normalize spectral difference measure. + if (self->blockInd < END_STARTUP_LONG) { + self->featureData[5] *= self->blockInd; + self->featureData[5] += signalEnergy; + self->featureData[5] /= (self->blockInd + 1); + } + + // Post and prior SNR needed for SpeechNoiseProb. + ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost); + + FeatureUpdate(self, magn, updateParsFlag); + SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost); + UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise); + + // Keep track of noise spectrum for next frame. + memcpy(self->noise, noise, sizeof(*noise) * self->magnLen); + memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen); +} + +void WebRtcNs_ProcessCore(NoiseSuppressionC* self, + const float* const* speechFrame, + size_t num_bands, + float* const* outFrame) { + // Main routine for noise reduction. + int flagHB = 0; + size_t i, j; + + float energy1, energy2, gain, factor, factor1, factor2; + float fout[BLOCKL_MAX]; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL]; + float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + + // SWB variables. + int deltaBweHB = 1; + int deltaGainHB = 1; + float decayBweHB = 1.0; + float gainMapParHB = 1.0; + float gainTimeDomainHB = 1.0; + float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; + float sumMagnAnalyze, sumMagnProcess; + + // Check that initiation has been done. + RTC_DCHECK_EQ(1, self->initFlag); + RTC_DCHECK_LE(num_bands - 1, NUM_HIGH_BANDS_MAX); + + const float* const* speechFrameHB = NULL; + float* const* outFrameHB = NULL; + size_t num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = num_bands - 1; + flagHB = 1; + // Range for averaging low band quantities for H band gain. + deltaBweHB = (int)self->magnLen / 4; + deltaGainHB = deltaBweHB; + } + + // Update analysis buffer for L band. + UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf); + + if (flagHB == 1) { + // Update analysis buffer for H bands. + for (i = 0; i < num_high_bands; ++i) { + UpdateBuffer(speechFrameHB[i], + self->blockLen, + self->anaLen, + self->dataBufHB[i]); + } + } + + Windowing(self->window, self->dataBuf, self->anaLen, winData); + energy1 = Energy(winData, self->anaLen); + if (energy1 == 0.0) { + // Synthesize the special case of zero input. + // Read out fully processed segment. + for (i = self->windShift; i < self->blockLen + self->windShift; i++) { + fout[i - self->windShift] = self->syntBuf[i]; + } + // Update synthesis buffer. + UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); + + for (i = 0; i < self->blockLen; ++i) + outFrame[0][i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); + + // For time-domain gain of HB. + if (flagHB == 1) { + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; ++j) { + outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } + } + } + + return; + } + + FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); + + if (self->blockInd < END_STARTUP_SHORT) { + for (i = 0; i < self->magnLen; i++) { + self->initMagnEst[i] += magn[i]; + } + } + + ComputeDdBasedWienerFilter(self, magn, theFilter); + + for (i = 0; i < self->magnLen; i++) { + // Flooring bottom. + if (theFilter[i] < self->denoiseBound) { + theFilter[i] = self->denoiseBound; + } + // Flooring top. + if (theFilter[i] > 1.f) { + theFilter[i] = 1.f; + } + if (self->blockInd < END_STARTUP_SHORT) { + theFilterTmp[i] = + (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]); + theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f); + // Flooring bottom. + if (theFilterTmp[i] < self->denoiseBound) { + theFilterTmp[i] = self->denoiseBound; + } + // Flooring top. + if (theFilterTmp[i] > 1.f) { + theFilterTmp[i] = 1.f; + } + // Weight the two suppression filters. + theFilter[i] *= (self->blockInd); + theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd); + theFilter[i] += theFilterTmp[i]; + theFilter[i] /= (END_STARTUP_SHORT); + } + + self->smooth[i] = theFilter[i]; + real[i] *= self->smooth[i]; + imag[i] *= self->smooth[i]; + } + // Keep track of |magn| spectrum for next frame. + memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen); + memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen); + // Back to time domain. + IFFT(self, real, imag, self->magnLen, self->anaLen, winData); + + // Scale factor: only do it after END_STARTUP_LONG time. + factor = 1.f; + if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) { + factor1 = 1.f; + factor2 = 1.f; + + energy2 = Energy(winData, self->anaLen); + gain = (float)sqrt(energy2 / (energy1 + 1.f)); + + // Scaling for new version. + if (gain > B_LIM) { + factor1 = 1.f + 1.3f * (gain - B_LIM); + if (gain * factor1 > 1.f) { + factor1 = 1.f / gain; + } + } + if (gain < B_LIM) { + // Don't reduce scale too much for pause regions: + // attenuation here should be controlled by flooring. + if (gain <= self->denoiseBound) { + gain = self->denoiseBound; + } + factor2 = 1.f - 0.3f * (B_LIM - gain); + } + // Combine both scales with speech/noise prob: + // note prior (priorSpeechProb) is not frequency dependent. + factor = self->priorSpeechProb * factor1 + + (1.f - self->priorSpeechProb) * factor2; + } // Out of self->gainmap == 1. + + Windowing(self->window, winData, self->anaLen, winData); + + // Synthesis. + for (i = 0; i < self->anaLen; i++) { + self->syntBuf[i] += factor * winData[i]; + } + // Read out fully processed segment. + for (i = self->windShift; i < self->blockLen + self->windShift; i++) { + fout[i - self->windShift] = self->syntBuf[i]; + } + // Update synthesis buffer. + UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); + + for (i = 0; i < self->blockLen; ++i) + outFrame[0][i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); + + // For time-domain gain of HB. + if (flagHB == 1) { + // Average speech prob from low band. + // Average over second half (i.e., 4->8kHz) of frequencies spectrum. + avgProbSpeechHB = 0.0; + for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) { + avgProbSpeechHB += self->speechProb[i]; + } + avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); + // If the speech was suppressed by a component between Analyze and + // Process, for example the AEC, then it should not be considered speech + // for high band suppression purposes. + sumMagnAnalyze = 0; + sumMagnProcess = 0; + for (i = 0; i < self->magnLen; ++i) { + sumMagnAnalyze += self->magnPrevAnalyze[i]; + sumMagnProcess += self->magnPrevProcess[i]; + } + avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze; + // Average filter gain from low band. + // Average over second half (i.e., 4->8kHz) of frequencies spectrum. + avgFilterGainHB = 0.0; + for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) { + avgFilterGainHB += self->smooth[i]; + } + avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); + avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f; + // Gain based on speech probability. + gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); + // Combine gain with low band gain. + gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB; + if (avgProbSpeechHB >= 0.5f) { + gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB; + } + gainTimeDomainHB = gainTimeDomainHB * decayBweHB; + // Make sure gain is within flooring range. + // Flooring bottom. + if (gainTimeDomainHB < self->denoiseBound) { + gainTimeDomainHB = self->denoiseBound; + } + // Flooring top. + if (gainTimeDomainHB > 1.f) { + gainTimeDomainHB = 1.f; + } + // Apply gain. + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; j++) { + outFrameHB[i][j] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + gainTimeDomainHB * self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } + } + } // End of H band gain computation. +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.h b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.h new file mode 100644 index 0000000000..97f76baf0d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/ns_core.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ +#define MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ + +#include "modules/audio_processing/ns/defines.h" + +typedef struct NSParaExtract_ { + // Bin size of histogram. + float binSizeLrt; + float binSizeSpecFlat; + float binSizeSpecDiff; + // Range of histogram over which LRT threshold is computed. + float rangeAvgHistLrt; + // Scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model. + float factor1ModelPars; // For LRT and spectral difference. + float factor2ModelPars; // For spectral_flatness: used when noise is flatter + // than speech. + // Peak limit for spectral flatness (varies between 0 and 1). + float thresPosSpecFlat; + // Limit on spacing of two highest peaks in histogram: spacing determined by + // bin size. + float limitPeakSpacingSpecFlat; + float limitPeakSpacingSpecDiff; + // Limit on relevance of second peak. + float limitPeakWeightsSpecFlat; + float limitPeakWeightsSpecDiff; + // Limit on fluctuation of LRT feature. + float thresFluctLrt; + // Limit on the max and min values for the feature thresholds. + float maxLrt; + float minLrt; + float maxSpecFlat; + float minSpecFlat; + float maxSpecDiff; + float minSpecDiff; + // Criteria of weight of histogram peak to accept/reject feature. + int thresWeightSpecFlat; + int thresWeightSpecDiff; + +} NSParaExtract; + +typedef struct NoiseSuppressionC_ { + uint32_t fs; + size_t blockLen; + size_t windShift; + size_t anaLen; + size_t magnLen; + int aggrMode; + const float* window; + float analyzeBuf[ANAL_BLOCKL_MAX]; + float dataBuf[ANAL_BLOCKL_MAX]; + float syntBuf[ANAL_BLOCKL_MAX]; + + int initFlag; + // Parameters for quantile noise estimation. + float density[SIMULT * HALF_ANAL_BLOCKL]; + float lquantile[SIMULT * HALF_ANAL_BLOCKL]; + float quantile[HALF_ANAL_BLOCKL]; + int counter[SIMULT]; + int updates; + // Parameters for Wiener filter. + float smooth[HALF_ANAL_BLOCKL]; + float overdrive; + float denoiseBound; + int gainmap; + // FFT work arrays. + size_t ip[IP_LENGTH]; + float wfft[W_LENGTH]; + + // Parameters for new method: some not needed, will reduce/cleanup later. + int32_t blockInd; // Frame index counter. + int modelUpdatePars[4]; // Parameters for updating or estimating. + // Thresholds/weights for prior model. + float priorModelPars[7]; // Parameters for prior model. + float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame. + float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame. + // Magnitude spectrum of previous analyze frame. + float magnPrevAnalyze[HALF_ANAL_BLOCKL]; + // Magnitude spectrum of previous process frame. + float magnPrevProcess[HALF_ANAL_BLOCKL]; + float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing. + float priorSpeechProb; // Prior speech/noise probability. + float featureData[7]; + // Conservative noise spectrum estimate. + float magnAvgPause[HALF_ANAL_BLOCKL]; + float signalEnergy; // Energy of |magn|. + float sumMagn; + float whiteNoiseLevel; // Initial noise estimate. + float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate. + float pinkNoiseNumerator; // Pink noise parameter: numerator. + float pinkNoiseExp; // Pink noise parameter: power of frequencies. + float parametricNoise[HALF_ANAL_BLOCKL]; + // Parameters for feature extraction. + NSParaExtract featureExtractionParams; + // Histograms for parameter estimation. + int histLrt[HIST_PAR_EST]; + int histSpecFlat[HIST_PAR_EST]; + int histSpecDiff[HIST_PAR_EST]; + // Quantities for high band estimate. + float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT. + // Buffering data for HB. + float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; + +} NoiseSuppressionC; + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcNs_InitCore(...) + * + * This function initializes a noise suppression instance + * + * Input: + * - self : Instance that should be initialized + * - fs : Sampling frequency + * + * Output: + * - self : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs); + +/**************************************************************************** + * WebRtcNs_set_policy_core(...) + * + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - self : Instance that should be initialized + * - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB) + * + * Output: + * - self : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode); + +/**************************************************************************** + * WebRtcNs_AnalyzeCore + * + * Estimate the background noise. + * + * Input: + * - self : Instance that should be initialized + * - speechFrame : Input speech frame for lower band + * + * Output: + * - self : Updated instance + */ +void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame); + +/**************************************************************************** + * WebRtcNs_ProcessCore + * + * Do noise suppression. + * + * Input: + * - self : Instance that should be initialized + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands + * + * Output: + * - self : Updated instance + * - outFrame : Output speech frame for each band + */ +void WebRtcNs_ProcessCore(NoiseSuppressionC* self, + const float* const* inFrame, + size_t num_bands, + float* const* outFrame); + +#ifdef __cplusplus +} +#endif +#endif // MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.c b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.c new file mode 100644 index 0000000000..8043656029 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.c @@ -0,0 +1,2107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/noise_suppression_x.h" + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "rtc_base/checks.h" +#include "common_audio/signal_processing/include/real_fft.h" +#include "modules/audio_processing/ns/nsx_core.h" +#include "system_wrappers/include/cpu_features_wrapper.h" + +#if defined(WEBRTC_HAS_NEON) +/* Tables are defined in ARM assembly files. */ +extern const int16_t WebRtcNsx_kLogTable[9]; +extern const int16_t WebRtcNsx_kCounterDiv[201]; +extern const int16_t WebRtcNsx_kLogTableFrac[256]; +#else +static const int16_t WebRtcNsx_kLogTable[9] = { + 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 +}; + +static const int16_t WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, + 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 +}; + +static const int16_t WebRtcNsx_kLogTableFrac[256] = { + 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, + 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, + 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, + 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 +}; +#endif // WEBRTC_HAS_NEON + +// Skip first frequency bins during estimation. (0 <= value < 64) +static const size_t kStartBand = 5; + +// hybrib Hanning & flat window +static const int16_t kBlocks80w128x[128] = { + 0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266, + 5771, 6270, 6762, 7246, 7723, 8192, 8652, 9102, 9543, 9974, 10394, + 10803, 11200, 11585, 11958, 12318, 12665, 12998, 13318, 13623, 13913, 14189, + 14449, 14694, 14924, 15137, 15334, 15515, 15679, 15826, 15956, 16069, 16165, + 16244, 16305, 16349, 16375, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16375, 16349, 16305, 16244, 16165, 16069, 15956, + 15826, 15679, 15515, 15334, 15137, 14924, 14694, 14449, 14189, 13913, 13623, + 13318, 12998, 12665, 12318, 11958, 11585, 11200, 10803, 10394, 9974, 9543, + 9102, 8652, 8192, 7723, 7246, 6762, 6270, 5771, 5266, 4756, 4240, + 3720, 3196, 2669, 2139, 1606, 1072, 536 +}; + +// hybrib Hanning & flat window +static const int16_t kBlocks160w256x[256] = { + 0, 268, 536, 804, 1072, 1339, 1606, 1872, + 2139, 2404, 2669, 2933, 3196, 3459, 3720, 3981, + 4240, 4499, 4756, 5012, 5266, 5520, 5771, 6021, + 6270, 6517, 6762, 7005, 7246, 7486, 7723, 7959, + 8192, 8423, 8652, 8878, 9102, 9324, 9543, 9760, + 9974, 10185, 10394, 10600, 10803, 11003, 11200, 11394, + 11585, 11773, 11958, 12140, 12318, 12493, 12665, 12833, + 12998, 13160, 13318, 13472, 13623, 13770, 13913, 14053, + 14189, 14321, 14449, 14574, 14694, 14811, 14924, 15032, + 15137, 15237, 15334, 15426, 15515, 15599, 15679, 15754, + 15826, 15893, 15956, 16015, 16069, 16119, 16165, 16207, + 16244, 16277, 16305, 16329, 16349, 16364, 16375, 16382, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16382, 16375, 16364, 16349, 16329, 16305, 16277, + 16244, 16207, 16165, 16119, 16069, 16015, 15956, 15893, + 15826, 15754, 15679, 15599, 15515, 15426, 15334, 15237, + 15137, 15032, 14924, 14811, 14694, 14574, 14449, 14321, + 14189, 14053, 13913, 13770, 13623, 13472, 13318, 13160, + 12998, 12833, 12665, 12493, 12318, 12140, 11958, 11773, + 11585, 11394, 11200, 11003, 10803, 10600, 10394, 10185, + 9974, 9760, 9543, 9324, 9102, 8878, 8652, 8423, + 8192, 7959, 7723, 7486, 7246, 7005, 6762, 6517, + 6270, 6021, 5771, 5520, 5266, 5012, 4756, 4499, + 4240, 3981, 3720, 3459, 3196, 2933, 2669, 2404, + 2139, 1872, 1606, 1339, 1072, 804, 536, 268 +}; + +// Gain factor1 table: Input value in Q8 and output value in Q13 +// original floating point code +// if (gain > blim) { +// factor1 = 1.0 + 1.3 * (gain - blim); +// if (gain * factor1 > 1.0) { +// factor1 = 1.0 / gain; +// } +// } else { +// factor1 = 1.0; +// } +static const int16_t kFactor1Table[257] = { + 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8233, 8274, 8315, 8355, 8396, 8436, 8475, 8515, 8554, 8592, 8631, 8669, + 8707, 8745, 8783, 8820, 8857, 8894, 8931, 8967, 9003, 9039, 9075, 9111, 9146, 9181, + 9216, 9251, 9286, 9320, 9354, 9388, 9422, 9456, 9489, 9523, 9556, 9589, 9622, 9655, + 9687, 9719, 9752, 9784, 9816, 9848, 9879, 9911, 9942, 9973, 10004, 10035, 10066, + 10097, 10128, 10158, 10188, 10218, 10249, 10279, 10308, 10338, 10368, 10397, 10426, + 10456, 10485, 10514, 10543, 10572, 10600, 10629, 10657, 10686, 10714, 10742, 10770, + 10798, 10826, 10854, 10882, 10847, 10810, 10774, 10737, 10701, 10666, 10631, 10596, + 10562, 10527, 10494, 10460, 10427, 10394, 10362, 10329, 10297, 10266, 10235, 10203, + 10173, 10142, 10112, 10082, 10052, 10023, 9994, 9965, 9936, 9908, 9879, 9851, 9824, + 9796, 9769, 9742, 9715, 9689, 9662, 9636, 9610, 9584, 9559, 9534, 9508, 9484, 9459, + 9434, 9410, 9386, 9362, 9338, 9314, 9291, 9268, 9245, 9222, 9199, 9176, 9154, 9132, + 9110, 9088, 9066, 9044, 9023, 9002, 8980, 8959, 8939, 8918, 8897, 8877, 8857, 8836, + 8816, 8796, 8777, 8757, 8738, 8718, 8699, 8680, 8661, 8642, 8623, 8605, 8586, 8568, + 8550, 8532, 8514, 8496, 8478, 8460, 8443, 8425, 8408, 8391, 8373, 8356, 8339, 8323, + 8306, 8289, 8273, 8256, 8240, 8224, 8208, 8192 +}; + +// For Factor2 tables +// original floating point code +// if (gain > blim) { +// factor2 = 1.0; +// } else { +// factor2 = 1.0 - 0.3 * (blim - gain); +// if (gain <= inst->denoiseBound) { +// factor2 = 1.0 - 0.3 * (blim - inst->denoiseBound); +// } +// } +// +// Gain factor table: Input value in Q8 and output value in Q13 +static const int16_t kFactor2Aggressiveness1[257] = { + 7577, 7577, 7577, 7577, 7577, 7577, + 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const int16_t kFactor2Aggressiveness2[257] = { + 7270, 7270, 7270, 7270, 7270, 7306, + 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const int16_t kFactor2Aggressiveness3[257] = { + 7184, 7184, 7184, 7229, 7270, 7306, + 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// sum of log2(i) from table index to inst->anaLen2 in Q5 +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kSumLogIndex[66] = { + 0, 22917, 22917, 22885, 22834, 22770, 22696, 22613, + 22524, 22428, 22326, 22220, 22109, 21994, 21876, 21754, + 21629, 21501, 21370, 21237, 21101, 20963, 20822, 20679, + 20535, 20388, 20239, 20089, 19937, 19783, 19628, 19470, + 19312, 19152, 18991, 18828, 18664, 18498, 18331, 18164, + 17994, 17824, 17653, 17480, 17306, 17132, 16956, 16779, + 16602, 16423, 16243, 16063, 15881, 15699, 15515, 15331, + 15146, 14960, 14774, 14586, 14398, 14209, 14019, 13829, + 13637, 13445 +}; + +// sum of log2(i)^2 from table index to inst->anaLen2 in Q2 +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kSumSquareLogIndex[66] = { + 0, 16959, 16959, 16955, 16945, 16929, 16908, 16881, + 16850, 16814, 16773, 16729, 16681, 16630, 16575, 16517, + 16456, 16392, 16325, 16256, 16184, 16109, 16032, 15952, + 15870, 15786, 15700, 15612, 15521, 15429, 15334, 15238, + 15140, 15040, 14938, 14834, 14729, 14622, 14514, 14404, + 14292, 14179, 14064, 13947, 13830, 13710, 13590, 13468, + 13344, 13220, 13094, 12966, 12837, 12707, 12576, 12444, + 12310, 12175, 12039, 11902, 11763, 11624, 11483, 11341, + 11198, 11054 +}; + +// log2(table index) in Q12 +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kLogIndex[129] = { + 0, 0, 4096, 6492, 8192, 9511, 10588, 11499, + 12288, 12984, 13607, 14170, 14684, 15157, 15595, 16003, + 16384, 16742, 17080, 17400, 17703, 17991, 18266, 18529, + 18780, 19021, 19253, 19476, 19691, 19898, 20099, 20292, + 20480, 20662, 20838, 21010, 21176, 21338, 21496, 21649, + 21799, 21945, 22087, 22226, 22362, 22495, 22625, 22752, + 22876, 22998, 23117, 23234, 23349, 23462, 23572, 23680, + 23787, 23892, 23994, 24095, 24195, 24292, 24388, 24483, + 24576, 24668, 24758, 24847, 24934, 25021, 25106, 25189, + 25272, 25354, 25434, 25513, 25592, 25669, 25745, 25820, + 25895, 25968, 26041, 26112, 26183, 26253, 26322, 26390, + 26458, 26525, 26591, 26656, 26721, 26784, 26848, 26910, + 26972, 27033, 27094, 27154, 27213, 27272, 27330, 27388, + 27445, 27502, 27558, 27613, 27668, 27722, 27776, 27830, + 27883, 27935, 27988, 28039, 28090, 28141, 28191, 28241, + 28291, 28340, 28388, 28437, 28484, 28532, 28579, 28626, + 28672 +}; + +// determinant of estimation matrix in Q0 corresponding to the log2 tables above +// Note that the first table value is invalid, since log2(0) = -infinity +static const int16_t kDeterminantEstMatrix[66] = { + 0, 29814, 25574, 22640, 20351, 18469, 16873, 15491, + 14277, 13199, 12233, 11362, 10571, 9851, 9192, 8587, + 8030, 7515, 7038, 6596, 6186, 5804, 5448, 5115, + 4805, 4514, 4242, 3988, 3749, 3524, 3314, 3116, + 2930, 2755, 2590, 2435, 2289, 2152, 2022, 1900, + 1785, 1677, 1575, 1478, 1388, 1302, 1221, 1145, + 1073, 1005, 942, 881, 825, 771, 721, 674, + 629, 587, 547, 510, 475, 442, 411, 382, + 355, 330 +}; + +// Update the noise estimation information. +static void UpdateNoiseEstimate(NoiseSuppressionFixedC* inst, int offset) { + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + int16_t tmp16 = 0; + const int16_t kExp2Const = 11819; // Q13 + + size_t i = 0; + + tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + inst->magnLen); + // Guarantee a Q-domain as high as possible and still fit in int16 + inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + kExp2Const, tmp16, 21); + for (i = 0; i < inst->magnLen; i++) { + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i]; + tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + tmp16 = (int16_t)(tmp32no2 >> 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 >>= -tmp16; + } else { + tmp32no1 <<= tmp16; + } + inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1); + } +} + +// Noise Estimation +static void NoiseEstimationC(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + int16_t countProd, delta, zeros, frac; + int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; // Q15 + const int16_t width_factor = 21845; + + size_t i, s, offset; + + tabind = inst->stages - inst->normData; + RTC_DCHECK_LT(tabind, 9); + RTC_DCHECK_GT(tabind, -9); + if (tabind < 0) { + logval = -WebRtcNsx_kLogTable[-tabind]; + } else { + logval = WebRtcNsx_kLogTable[tabind]; + } + + // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) + // magn is in Q(-stages), and the real lmagn values are: + // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) + // lmagn in Q8 + for (i = 0; i < inst->magnLen; i++) { + if (magn[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)((((uint32_t)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + RTC_DCHECK_LT(frac, 256); + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + // log2(magn(i))*log(2) + lmagn[i] = (int16_t)((log2 * log2_const) >> 15); + // + log(2^stages) + lmagn[i] += logval; + } else { + lmagn[i] = logval;//0; + } + } + + // loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) { + offset = s * inst->magnLen; + + // Get counter values from state + counter = inst->noiseEstCounter[s]; + RTC_DCHECK_LT(counter, 201); + countDiv = WebRtcNsx_kCounterDiv[counter]; + countProd = (int16_t)(counter * countDiv); + + // quant_est(...) + for (i = 0; i < inst->magnLen; i++) { + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + // Get the value for delta by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]); + delta = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; + } + } + + // update log quantile estimate + tmp16 = (int16_t)((delta * countDiv) >> 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + inst->noiseEstLogQuantile[offset + i] += tmp16 / 4; + } else { + tmp16 += 1; + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + // TODO(bjornv): investigate why we need to truncate twice. + tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // This is the smallest fixed point representation we can + // have, hence we limit the output. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } + + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } + } // end loop over magnitude spectrum + + if (counter >= END_STARTUP_LONG) { + inst->noiseEstCounter[s] = 0; + if (inst->blockIndex >= END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + } + inst->noiseEstCounter[s]++; + + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->blockIndex < END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + + for (i = 0; i < inst->magnLen; i++) { + noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) + } + (*q_noise) = (int16_t)inst->qNoise; +} + +// Filter the data in the frequency domain, and create spectrum. +static void PrepareSpectrumC(NoiseSuppressionFixedC* inst, int16_t* freq_buf) { + size_t i = 0, j = 0; + + for (i = 0; i < inst->magnLen; i++) { + inst->real[i] = (int16_t)((inst->real[i] * + (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + inst->imag[i] = (int16_t)((inst->imag[i] * + (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + } + + freq_buf[0] = inst->real[0]; + freq_buf[1] = -inst->imag[0]; + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + freq_buf[j] = inst->real[i]; + freq_buf[j + 1] = -inst->imag[i]; + } + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// Denormalize the real-valued signal |in|, the output from inverse FFT. +static void DenormalizeC(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor) { + size_t i = 0; + int32_t tmp32 = 0; + for (i = 0; i < inst->anaLen; i += 1) { + tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i], + factor - inst->normData); + inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + } +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +static void SynthesisUpdateC(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + size_t i = 0; + int16_t tmp16a = 0; + int16_t tmp16b = 0; + int32_t tmp32 = 0; + + // synthesis + for (i = 0; i < inst->anaLen; i++) { + tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->real[i], 14); // Q0, window in Q14 + tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0 + // Down shift with rounding + tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(inst->synthesisBuffer[i], + tmp16b); // Q0 + } + + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) { + out_frame[i] = inst->synthesisBuffer[i]; // Q0 + } + + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Update analysis buffer for lower band, and window data before FFT. +static void AnalysisUpdateC(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + size_t i = 0; + + // For lower band update analysis buffer. + memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, + inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + + // Window data before FFT. + for (i = 0; i < inst->anaLen; i++) { + out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->analysisBuffer[i], 14); // Q0 + } +} + +// Normalize the real-valued signal |in|, the input to forward FFT. +static void NormalizeRealBufferC(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out) { + size_t i = 0; + RTC_DCHECK_GE(inst->normData, 0); + for (i = 0; i < inst->anaLen; ++i) { + out[i] = in[i] << inst->normData; // Q(normData) + } +} + +// Declare function pointers. +NoiseEstimation WebRtcNsx_NoiseEstimation; +PrepareSpectrum WebRtcNsx_PrepareSpectrum; +SynthesisUpdate WebRtcNsx_SynthesisUpdate; +AnalysisUpdate WebRtcNsx_AnalysisUpdate; +Denormalize WebRtcNsx_Denormalize; +NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; + +#if defined(WEBRTC_HAS_NEON) +// Initialize function pointers for ARM Neon platform. +static void WebRtcNsx_InitNeon(void) { + WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon; + WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon; + WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon; + WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon; +} +#endif + +#if defined(MIPS32_LE) +// Initialize function pointers for MIPS platform. +static void WebRtcNsx_InitMips(void) { + WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips; + WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips; + WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips; + WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips; +#if defined(MIPS_DSP_R1_LE) + WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips; +#endif +} +#endif + +void WebRtcNsx_CalcParametricNoiseEstimate(NoiseSuppressionFixedC* inst, + int16_t pink_noise_exp_avg, + int32_t pink_noise_num_avg, + int freq_index, + uint32_t* noise_estimate, + uint32_t* noise_estimate_avg) { + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + int16_t int_part = 0; + int16_t frac_part = 0; + + // Use pink noise estimate + // noise_estimate = 2^(pinkNoiseNumerator + pinkNoiseExp * log2(j)) + RTC_DCHECK_GE(freq_index, 0); + RTC_DCHECK_LT(freq_index, 129); + tmp32no2 = (pink_noise_exp_avg * kLogIndex[freq_index]) >> 15; // Q11 + tmp32no1 = pink_noise_num_avg - tmp32no2; // Q11 + + // Calculate output: 2^tmp32no1 + // Output in Q(minNorm-stages) + tmp32no1 += (inst->minNorm - inst->stages) << 11; + if (tmp32no1 > 0) { + int_part = (int16_t)(tmp32no1 >> 11); + frac_part = (int16_t)(tmp32no1 & 0x000007ff); // Q11 + // Piecewise linear approximation of 'b' in + // 2^(int_part+frac_part) = 2^int_part * (1 + b) + // 'b' is given in Q11 and below stored in frac_part. + if (frac_part >> 10) { + // Upper fractional part + tmp32no2 = (2048 - frac_part) * 1244; // Q21 + tmp32no2 = 2048 - (tmp32no2 >> 10); + } else { + // Lower fractional part + tmp32no2 = (frac_part * 804) >> 10; + } + // Shift fractional part to Q(minNorm-stages) + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, int_part - 11); + *noise_estimate_avg = (1 << int_part) + (uint32_t)tmp32no2; + // Scale up to initMagnEst, which is not block averaged + *noise_estimate = (*noise_estimate_avg) * (uint32_t)(inst->blockIndex + 1); + } +} + +// Initialize state +int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs) { + int i; + + //check for valid pointer + if (inst == NULL) { + return -1; + } + // + + // Initialization of struct + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { + inst->fs = fs; + } else { + return -1; + } + + if (fs == 8000) { + inst->blockLen10ms = 80; + inst->anaLen = 128; + inst->stages = 7; + inst->window = kBlocks80w128x; + inst->thresholdLogLrt = 131072; //default threshold for LRT feature + inst->maxLrt = 0x0040000; + inst->minLrt = 52429; + } else { + inst->blockLen10ms = 160; + inst->anaLen = 256; + inst->stages = 8; + inst->window = kBlocks160w256x; + inst->thresholdLogLrt = 212644; //default threshold for LRT feature + inst->maxLrt = 0x0080000; + inst->minLrt = 104858; + } + inst->anaLen2 = inst->anaLen / 2; + inst->magnLen = inst->anaLen2 + 1; + + if (inst->real_fft != NULL) { + WebRtcSpl_FreeRealFFT(inst->real_fft); + } + inst->real_fft = WebRtcSpl_CreateRealFFT(inst->stages); + if (inst->real_fft == NULL) { + return -1; + } + + WebRtcSpl_ZerosArrayW16(inst->analysisBuffer, ANAL_BLOCKL_MAX); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX); + + // for HB processing + WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0], + NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); + // for quantile noise estimation + WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL); + for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { + inst->noiseEstLogQuantile[i] = 2048; // Q8 + inst->noiseEstDensity[i] = 153; // Q9 + } + for (i = 0; i < SIMULT; i++) { + inst->noiseEstCounter[i] = (int16_t)(END_STARTUP_LONG * (i + 1)) / SIMULT; + } + + // Initialize suppression filter with ones + WebRtcSpl_MemSetW16((int16_t*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL); + + // Set the aggressiveness: default + inst->aggrMode = 0; + + //initialize variables for new method + inst->priorNonSpeechProb = 8192; // Q14(0.5) prior probability for speech/noise + for (i = 0; i < HALF_ANAL_BLOCKL; i++) { + inst->prevMagnU16[i] = 0; + inst->prevNoiseU32[i] = 0; //previous noise-spectrum + inst->logLrtTimeAvgW32[i] = 0; //smooth LR ratio + inst->avgMagnPause[i] = 0; //conservative noise spectrum estimate + inst->initMagnEst[i] = 0; //initial average magnitude spectrum + } + + //feature quantities + inst->thresholdSpecDiff = 50; //threshold for difference feature: determined on-line + inst->thresholdSpecFlat = 20480; //threshold for flatness: determined on-line + inst->featureLogLrt = inst->thresholdLogLrt; //average LRT factor (= threshold) + inst->featureSpecFlat = inst->thresholdSpecFlat; //spectral flatness (= threshold) + inst->featureSpecDiff = inst->thresholdSpecDiff; //spectral difference (= threshold) + inst->weightLogLrt = 6; //default weighting par for LRT feature + inst->weightSpecFlat = 0; //default weighting par for spectral flatness feature + inst->weightSpecDiff = 0; //default weighting par for spectral difference feature + + inst->curAvgMagnEnergy = 0; //window time-average of input magnitude spectrum + inst->timeAvgMagnEnergy = 0; //normalization for spectral difference + inst->timeAvgMagnEnergyTmp = 0; //normalization for spectral difference + + //histogram quantities: used to estimate/update thresholds for features + WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST); + + inst->blockIndex = -1; //frame counter + + //inst->modelUpdate = 500; //window for update + inst->modelUpdate = (1 << STAT_UPDATES); //window for update + inst->cntThresUpdate = 0; //counter feature thresholds updates + + inst->sumMagn = 0; + inst->magnEnergy = 0; + inst->prevQMagn = 0; + inst->qNoise = 0; + inst->prevQNoise = 0; + + inst->energyIn = 0; + inst->scaleEnergyIn = 0; + + inst->whiteNoiseLevel = 0; + inst->pinkNoiseNumerator = 0; + inst->pinkNoiseExp = 0; + inst->minNorm = 15; // Start with full scale + inst->zeroInputSignal = 0; + + //default mode + WebRtcNsx_set_policy_core(inst, 0); + +#ifdef NS_FILEDEBUG + inst->infile = fopen("indebug.pcm", "wb"); + inst->outfile = fopen("outdebug.pcm", "wb"); + inst->file1 = fopen("file1.pcm", "wb"); + inst->file2 = fopen("file2.pcm", "wb"); + inst->file3 = fopen("file3.pcm", "wb"); + inst->file4 = fopen("file4.pcm", "wb"); + inst->file5 = fopen("file5.pcm", "wb"); +#endif + + // Initialize function pointers. + WebRtcNsx_NoiseEstimation = NoiseEstimationC; + WebRtcNsx_PrepareSpectrum = PrepareSpectrumC; + WebRtcNsx_SynthesisUpdate = SynthesisUpdateC; + WebRtcNsx_AnalysisUpdate = AnalysisUpdateC; + WebRtcNsx_Denormalize = DenormalizeC; + WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC; + +#if defined(WEBRTC_HAS_NEON) + WebRtcNsx_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcNsx_InitMips(); +#endif + + inst->initFlag = 1; + + return 0; +} + +int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode) { + // allow for modes:0,1,2,3 + if (mode < 0 || mode > 3) { + return -1; + } + + inst->aggrMode = mode; + if (mode == 0) { + inst->overdrive = 256; // Q8(1.0) + inst->denoiseBound = 8192; // Q14(0.5) + inst->gainMap = 0; // No gain compensation + } else if (mode == 1) { + inst->overdrive = 256; // Q8(1.0) + inst->denoiseBound = 4096; // Q14(0.25) + inst->factor2Table = kFactor2Aggressiveness1; + inst->gainMap = 1; + } else if (mode == 2) { + inst->overdrive = 282; // ~= Q8(1.1) + inst->denoiseBound = 2048; // Q14(0.125) + inst->factor2Table = kFactor2Aggressiveness2; + inst->gainMap = 1; + } else if (mode == 3) { + inst->overdrive = 320; // Q8(1.25) + inst->denoiseBound = 1475; // ~= Q14(0.09) + inst->factor2Table = kFactor2Aggressiveness3; + inst->gainMap = 1; + } + return 0; +} + +// Extract thresholds for feature parameters +// histograms are computed over some window_size (given by window_pars) +// thresholds and weights are extracted every window +// flag 0 means update histogram only, flag 1 means compute the thresholds/weights +// threshold and weights are returned in: inst->priorModelPars +void WebRtcNsx_FeatureParameterExtraction(NoiseSuppressionFixedC* inst, + int flag) { + uint32_t tmpU32; + uint32_t histIndex; + uint32_t posPeak1SpecFlatFX, posPeak2SpecFlatFX; + uint32_t posPeak1SpecDiffFX, posPeak2SpecDiffFX; + + int32_t tmp32; + int32_t fluctLrtFX, thresFluctLrtFX; + int32_t avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX; + + int16_t j; + int16_t numHistLrt; + + int i; + int useFeatureSpecFlat, useFeatureSpecDiff, featureSum; + int maxPeak1, maxPeak2; + int weightPeak1SpecFlat, weightPeak2SpecFlat; + int weightPeak1SpecDiff, weightPeak2SpecDiff; + + //update histograms + if (!flag) { + // LRT + // Type casting to UWord32 is safe since negative values will not be wrapped to larger + // values than HIST_PAR_EST + histIndex = (uint32_t)(inst->featureLogLrt); + if (histIndex < HIST_PAR_EST) { + inst->histLrt[histIndex]++; + } + // Spectral flatness + // (inst->featureSpecFlat*20)>>10 = (inst->featureSpecFlat*5)>>8 + histIndex = (inst->featureSpecFlat * 5) >> 8; + if (histIndex < HIST_PAR_EST) { + inst->histSpecFlat[histIndex]++; + } + // Spectral difference + histIndex = HIST_PAR_EST; + if (inst->timeAvgMagnEnergy > 0) { + // Guard against division by zero + // If timeAvgMagnEnergy == 0 we have no normalizing statistics and + // therefore can't update the histogram + histIndex = ((inst->featureSpecDiff * 5) >> inst->stages) / + inst->timeAvgMagnEnergy; + } + if (histIndex < HIST_PAR_EST) { + inst->histSpecDiff[histIndex]++; + } + } + + // extract parameters for speech/noise probability + if (flag) { + useFeatureSpecDiff = 1; + //for LRT feature: + // compute the average over inst->featureExtractionParams.rangeAvgHistLrt + avgHistLrtFX = 0; + avgSquareHistLrtFX = 0; + numHistLrt = 0; + for (i = 0; i < BIN_SIZE_LRT; i++) { + j = (2 * i + 1); + tmp32 = inst->histLrt[i] * j; + avgHistLrtFX += tmp32; + numHistLrt += inst->histLrt[i]; + avgSquareHistLrtFX += tmp32 * j; + } + avgHistLrtComplFX = avgHistLrtFX; + for (; i < HIST_PAR_EST; i++) { + j = (2 * i + 1); + tmp32 = inst->histLrt[i] * j; + avgHistLrtComplFX += tmp32; + avgSquareHistLrtFX += tmp32 * j; + } + fluctLrtFX = avgSquareHistLrtFX * numHistLrt - + avgHistLrtFX * avgHistLrtComplFX; + thresFluctLrtFX = THRES_FLUCT_LRT * numHistLrt; + // get threshold for LRT feature: + tmpU32 = (FACTOR_1_LRT_DIFF * (uint32_t)avgHistLrtFX); + if ((fluctLrtFX < thresFluctLrtFX) || (numHistLrt == 0) || + (tmpU32 > (uint32_t)(100 * numHistLrt))) { + //very low fluctuation, so likely noise + inst->thresholdLogLrt = inst->maxLrt; + } else { + tmp32 = (int32_t)((tmpU32 << (9 + inst->stages)) / numHistLrt / + 25); + // check if value is within min/max range + inst->thresholdLogLrt = WEBRTC_SPL_SAT(inst->maxLrt, + tmp32, + inst->minLrt); + } + if (fluctLrtFX < thresFluctLrtFX) { + // Do not use difference feature if fluctuation of LRT feature is very low: + // most likely just noise state + useFeatureSpecDiff = 0; + } + + // for spectral flatness and spectral difference: compute the main peaks of histogram + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecFlatFX = 0; + posPeak2SpecFlatFX = 0; + weightPeak1SpecFlat = 0; + weightPeak2SpecFlat = 0; + + // peaks for flatness + for (i = 0; i < HIST_PAR_EST; i++) { + if (inst->histSpecFlat[i] > maxPeak1) { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecFlat = weightPeak1SpecFlat; + posPeak2SpecFlatFX = posPeak1SpecFlatFX; + + maxPeak1 = inst->histSpecFlat[i]; + weightPeak1SpecFlat = inst->histSpecFlat[i]; + posPeak1SpecFlatFX = (uint32_t)(2 * i + 1); + } else if (inst->histSpecFlat[i] > maxPeak2) { + // Found new "second" peak + maxPeak2 = inst->histSpecFlat[i]; + weightPeak2SpecFlat = inst->histSpecFlat[i]; + posPeak2SpecFlatFX = (uint32_t)(2 * i + 1); + } + } + + // for spectral flatness feature + useFeatureSpecFlat = 1; + // merge the two peaks if they are close + if ((posPeak1SpecFlatFX - posPeak2SpecFlatFX < LIM_PEAK_SPACE_FLAT_DIFF) + && (weightPeak2SpecFlat * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecFlat)) { + weightPeak1SpecFlat += weightPeak2SpecFlat; + posPeak1SpecFlatFX = (posPeak1SpecFlatFX + posPeak2SpecFlatFX) >> 1; + } + //reject if weight of peaks is not large enough, or peak value too small + if (weightPeak1SpecFlat < THRES_WEIGHT_FLAT_DIFF || posPeak1SpecFlatFX + < THRES_PEAK_FLAT) { + useFeatureSpecFlat = 0; + } else { // if selected, get the threshold + // compute the threshold and check if value is within min/max range + inst->thresholdSpecFlat = WEBRTC_SPL_SAT(MAX_FLAT_Q10, FACTOR_2_FLAT_Q10 + * posPeak1SpecFlatFX, MIN_FLAT_Q10); //Q10 + } + // done with flatness feature + + if (useFeatureSpecDiff) { + //compute two peaks for spectral difference + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecDiffFX = 0; + posPeak2SpecDiffFX = 0; + weightPeak1SpecDiff = 0; + weightPeak2SpecDiff = 0; + // peaks for spectral difference + for (i = 0; i < HIST_PAR_EST; i++) { + if (inst->histSpecDiff[i] > maxPeak1) { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecDiff = weightPeak1SpecDiff; + posPeak2SpecDiffFX = posPeak1SpecDiffFX; + + maxPeak1 = inst->histSpecDiff[i]; + weightPeak1SpecDiff = inst->histSpecDiff[i]; + posPeak1SpecDiffFX = (uint32_t)(2 * i + 1); + } else if (inst->histSpecDiff[i] > maxPeak2) { + // Found new "second" peak + maxPeak2 = inst->histSpecDiff[i]; + weightPeak2SpecDiff = inst->histSpecDiff[i]; + posPeak2SpecDiffFX = (uint32_t)(2 * i + 1); + } + } + + // merge the two peaks if they are close + if ((posPeak1SpecDiffFX - posPeak2SpecDiffFX < LIM_PEAK_SPACE_FLAT_DIFF) + && (weightPeak2SpecDiff * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecDiff)) { + weightPeak1SpecDiff += weightPeak2SpecDiff; + posPeak1SpecDiffFX = (posPeak1SpecDiffFX + posPeak2SpecDiffFX) >> 1; + } + // get the threshold value and check if value is within min/max range + inst->thresholdSpecDiff = WEBRTC_SPL_SAT(MAX_DIFF, FACTOR_1_LRT_DIFF + * posPeak1SpecDiffFX, MIN_DIFF); //5x bigger + //reject if weight of peaks is not large enough + if (weightPeak1SpecDiff < THRES_WEIGHT_FLAT_DIFF) { + useFeatureSpecDiff = 0; + } + // done with spectral difference feature + } + + // select the weights between the features + // inst->priorModelPars[4] is weight for LRT: always selected + featureSum = 6 / (1 + useFeatureSpecFlat + useFeatureSpecDiff); + inst->weightLogLrt = featureSum; + inst->weightSpecFlat = useFeatureSpecFlat * featureSum; + inst->weightSpecDiff = useFeatureSpecDiff * featureSum; + + // set histograms to zero for next update + WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST); + } // end of flag == 1 +} + + +// Compute spectral flatness on input spectrum +// magn is the magnitude spectrum +// spectral flatness is returned in inst->featureSpecFlat +void WebRtcNsx_ComputeSpectralFlatness(NoiseSuppressionFixedC* inst, + uint16_t* magn) { + uint32_t tmpU32; + uint32_t avgSpectralFlatnessNum, avgSpectralFlatnessDen; + + int32_t tmp32; + int32_t currentSpectralFlatness, logCurSpectralFlatness; + + int16_t zeros, frac, intPart; + + size_t i; + + // for flatness + avgSpectralFlatnessNum = 0; + avgSpectralFlatnessDen = inst->sumMagn - (uint32_t)magn[0]; // Q(normData-stages) + + // compute log of ratio of the geometric to arithmetic mean: check for log(0) case + // flatness = exp( sum(log(magn[i]))/N - log(sum(magn[i])/N) ) + // = exp( sum(log(magn[i]))/N ) * N / sum(magn[i]) + // = 2^( sum(log2(magn[i]))/N - (log2(sum(magn[i])) - log2(N)) ) [This is used] + for (i = 1; i < inst->magnLen; i++) { + // First bin is excluded from spectrum measures. Number of bins is now a power of 2 + if (magn[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)(((uint32_t)((uint32_t)(magn[i]) << zeros) + & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + RTC_DCHECK_LT(frac, 256); + tmpU32 = (uint32_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); // Q8 + avgSpectralFlatnessNum += tmpU32; // Q8 + } else { + //if at least one frequency component is zero, treat separately + tmpU32 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecFlat, SPECT_FLAT_TAVG_Q14); // Q24 + inst->featureSpecFlat -= tmpU32 >> 14; // Q10 + return; + } + } + //ratio and inverse log: check for case of log(0) + zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen); + frac = (int16_t)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23); + // log2(avgSpectralFlatnessDen) + RTC_DCHECK_LT(frac, 256); + tmp32 = (int32_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8 + logCurSpectralFlatness = (int32_t)avgSpectralFlatnessNum; + logCurSpectralFlatness += ((int32_t)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1) + logCurSpectralFlatness -= (tmp32 << (inst->stages - 1)); + logCurSpectralFlatness <<= (10 - inst->stages); // Q17 + tmp32 = (int32_t)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness) + & 0x0001FFFF)); //Q17 + intPart = 7 - (logCurSpectralFlatness >> 17); // Add 7 for output in Q10. + if (intPart > 0) { + currentSpectralFlatness = tmp32 >> intPart; + } else { + currentSpectralFlatness = tmp32 << -intPart; + } + + //time average update of spectral flatness feature + tmp32 = currentSpectralFlatness - (int32_t)inst->featureSpecFlat; // Q10 + tmp32 *= SPECT_FLAT_TAVG_Q14; // Q24 + inst->featureSpecFlat += tmp32 >> 14; // Q10 + // done with flatness feature +} + + +// Compute the difference measure between input spectrum and a template/learned noise spectrum +// magn_tmp is the input spectrum +// the reference/template spectrum is inst->magn_avg_pause[i] +// returns (normalized) spectral difference in inst->featureSpecDiff +void WebRtcNsx_ComputeSpectralDifference(NoiseSuppressionFixedC* inst, + uint16_t* magnIn) { + // This is to be calculated: + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) + + uint32_t tmpU32no1, tmpU32no2; + uint32_t varMagnUFX, varPauseUFX, avgDiffNormMagnUFX; + + int32_t tmp32no1, tmp32no2; + int32_t avgPauseFX, avgMagnFX, covMagnPauseFX; + int32_t maxPause, minPause; + + int16_t tmp16no1; + + size_t i; + int norm32, nShifts; + + avgPauseFX = 0; + maxPause = 0; + minPause = inst->avgMagnPause[0]; // Q(prevQMagn) + // compute average quantities + for (i = 0; i < inst->magnLen; i++) { + // Compute mean of magn_pause + avgPauseFX += inst->avgMagnPause[i]; // in Q(prevQMagn) + maxPause = WEBRTC_SPL_MAX(maxPause, inst->avgMagnPause[i]); + minPause = WEBRTC_SPL_MIN(minPause, inst->avgMagnPause[i]); + } + // normalize by replacing div of "inst->magnLen" with "inst->stages-1" shifts + avgPauseFX >>= inst->stages - 1; + avgMagnFX = inst->sumMagn >> (inst->stages - 1); + // Largest possible deviation in magnPause for (co)var calculations + tmp32no1 = WEBRTC_SPL_MAX(maxPause - avgPauseFX, avgPauseFX - minPause); + // Get number of shifts to make sure we don't get wrap around in varPause + nShifts = WEBRTC_SPL_MAX(0, 10 + inst->stages - WebRtcSpl_NormW32(tmp32no1)); + + varMagnUFX = 0; + varPauseUFX = 0; + covMagnPauseFX = 0; + for (i = 0; i < inst->magnLen; i++) { + // Compute var and cov of magn and magn_pause + tmp16no1 = (int16_t)((int32_t)magnIn[i] - avgMagnFX); + tmp32no2 = inst->avgMagnPause[i] - avgPauseFX; + varMagnUFX += (uint32_t)(tmp16no1 * tmp16no1); // Q(2*qMagn) + tmp32no1 = tmp32no2 * tmp16no1; // Q(prevQMagn+qMagn) + covMagnPauseFX += tmp32no1; // Q(prevQMagn+qMagn) + tmp32no1 = tmp32no2 >> nShifts; // Q(prevQMagn-minPause). + varPauseUFX += tmp32no1 * tmp32no1; // Q(2*(prevQMagn-minPause)) + } + //update of average magnitude spectrum: Q(-2*stages) and averaging replaced by shifts + inst->curAvgMagnEnergy += + inst->magnEnergy >> (2 * inst->normData + inst->stages - 1); + + avgDiffNormMagnUFX = varMagnUFX; // Q(2*qMagn) + if ((varPauseUFX) && (covMagnPauseFX)) { + tmpU32no1 = (uint32_t)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn) + norm32 = WebRtcSpl_NormU32(tmpU32no1) - 16; + if (norm32 > 0) { + tmpU32no1 <<= norm32; // Q(prevQMagn+qMagn+norm32) + } else { + tmpU32no1 >>= -norm32; // Q(prevQMagn+qMagn+norm32) + } + tmpU32no2 = WEBRTC_SPL_UMUL(tmpU32no1, tmpU32no1); // Q(2*(prevQMagn+qMagn-norm32)) + + nShifts += norm32; + nShifts <<= 1; + if (nShifts < 0) { + varPauseUFX >>= (-nShifts); // Q(2*(qMagn+norm32+minPause)) + nShifts = 0; + } + if (varPauseUFX > 0) { + // Q(2*(qMagn+norm32-16+minPause)) + tmpU32no1 = tmpU32no2 / varPauseUFX; + tmpU32no1 >>= nShifts; + + // Q(2*qMagn) + avgDiffNormMagnUFX -= WEBRTC_SPL_MIN(avgDiffNormMagnUFX, tmpU32no1); + } else { + avgDiffNormMagnUFX = 0; + } + } + //normalize and compute time average update of difference feature + tmpU32no1 = avgDiffNormMagnUFX >> (2 * inst->normData); + if (inst->featureSpecDiff > tmpU32no1) { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecDiff - tmpU32no1, + SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) + inst->featureSpecDiff -= tmpU32no2 >> 8; // Q(-2*stages) + } else { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no1 - inst->featureSpecDiff, + SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) + inst->featureSpecDiff += tmpU32no2 >> 8; // Q(-2*stages) + } +} + +// Transform input (speechFrame) to frequency domain magnitude (magnU16) +void WebRtcNsx_DataAnalysis(NoiseSuppressionFixedC* inst, + short* speechFrame, + uint16_t* magnU16) { + uint32_t tmpU32no1; + + int32_t tmp_1_w32 = 0; + int32_t tmp_2_w32 = 0; + int32_t sum_log_magn = 0; + int32_t sum_log_i_log_magn = 0; + + uint16_t sum_log_magn_u16 = 0; + uint16_t tmp_u16 = 0; + + int16_t sum_log_i = 0; + int16_t sum_log_i_square = 0; + int16_t frac = 0; + int16_t log2 = 0; + int16_t matrix_determinant = 0; + int16_t maxWinData; + + size_t i, j; + int zeros; + int net_norm = 0; + int right_shifts_in_magnU16 = 0; + int right_shifts_in_initMagnEst = 0; + + int16_t winData_buff[ANAL_BLOCKL_MAX * 2 + 16]; + int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16]; + + // Align the structures to 32-byte boundary for the FFT function. + int16_t* winData = (int16_t*) (((uintptr_t)winData_buff + 31) & ~31); + int16_t* realImag = (int16_t*) (((uintptr_t) realImag_buff + 31) & ~31); + + // Update analysis buffer for lower band, and window data before FFT. + WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame); + + // Get input energy + inst->energyIn = + WebRtcSpl_Energy(winData, inst->anaLen, &inst->scaleEnergyIn); + + // Reset zero input flag + inst->zeroInputSignal = 0; + // Acquire norm for winData + maxWinData = WebRtcSpl_MaxAbsValueW16(winData, inst->anaLen); + inst->normData = WebRtcSpl_NormW16(maxWinData); + if (maxWinData == 0) { + // Treat zero input separately. + inst->zeroInputSignal = 1; + return; + } + + // Determine the net normalization in the frequency domain + net_norm = inst->stages - inst->normData; + // Track lowest normalization factor and use it to prevent wrap around in shifting + right_shifts_in_magnU16 = inst->normData - inst->minNorm; + right_shifts_in_initMagnEst = WEBRTC_SPL_MAX(-right_shifts_in_magnU16, 0); + inst->minNorm -= right_shifts_in_initMagnEst; + right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0); + + // create realImag as winData interleaved with zeros (= imag. part), normalize it + WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag); + + // FFT output will be in winData[]. + WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData); + + inst->imag[0] = 0; // Q(normData-stages) + inst->imag[inst->anaLen2] = 0; + inst->real[0] = winData[0]; // Q(normData-stages) + inst->real[inst->anaLen2] = winData[inst->anaLen]; + // Q(2*(normData-stages)) + inst->magnEnergy = (uint32_t)(inst->real[0] * inst->real[0]); + inst->magnEnergy += (uint32_t)(inst->real[inst->anaLen2] * + inst->real[inst->anaLen2]); + magnU16[0] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages) + magnU16[inst->anaLen2] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]); + inst->sumMagn = (uint32_t)magnU16[0]; // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[inst->anaLen2]; + + if (inst->blockIndex >= END_STARTUP_SHORT) { + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + inst->real[i] = winData[j]; + inst->imag[i] = -winData[j + 1]; + // magnitude spectrum + // energy in Q(2*(normData-stages)) + tmpU32no1 = (uint32_t)(winData[j] * winData[j]); + tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]); + inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) + + magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages) + } + } else { + // + // Gather information during startup for noise parameter estimation + // + + // Switch initMagnEst to Q(minNorm-stages) + inst->initMagnEst[0] >>= right_shifts_in_initMagnEst; + inst->initMagnEst[inst->anaLen2] >>= right_shifts_in_initMagnEst; + + // Update initMagnEst with magnU16 in Q(minNorm-stages). + inst->initMagnEst[0] += magnU16[0] >> right_shifts_in_magnU16; + inst->initMagnEst[inst->anaLen2] += + magnU16[inst->anaLen2] >> right_shifts_in_magnU16; + + log2 = 0; + if (magnU16[inst->anaLen2]) { + // Calculate log2(magnU16[inst->anaLen2]) + zeros = WebRtcSpl_NormU32((uint32_t)magnU16[inst->anaLen2]); + frac = (int16_t)((((uint32_t)magnU16[inst->anaLen2] << zeros) & + 0x7FFFFFFF) >> 23); // Q8 + // log2(magnU16(i)) in Q8 + RTC_DCHECK_LT(frac, 256); + log2 = (int16_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); + } + + sum_log_magn = (int32_t)log2; // Q8 + // sum_log_i_log_magn in Q17 + sum_log_i_log_magn = (kLogIndex[inst->anaLen2] * log2) >> 3; + + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + inst->real[i] = winData[j]; + inst->imag[i] = -winData[j + 1]; + // magnitude spectrum + // energy in Q(2*(normData-stages)) + tmpU32no1 = (uint32_t)(winData[j] * winData[j]); + tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]); + inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) + + magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages) + + // Switch initMagnEst to Q(minNorm-stages) + inst->initMagnEst[i] >>= right_shifts_in_initMagnEst; + + // Update initMagnEst with magnU16 in Q(minNorm-stages). + inst->initMagnEst[i] += magnU16[i] >> right_shifts_in_magnU16; + + if (i >= kStartBand) { + // For pink noise estimation. Collect data neglecting lower frequency band + log2 = 0; + if (magnU16[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magnU16[i]); + frac = (int16_t)((((uint32_t)magnU16[i] << zeros) & + 0x7FFFFFFF) >> 23); + // log2(magnU16(i)) in Q8 + RTC_DCHECK_LT(frac, 256); + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + } + sum_log_magn += (int32_t)log2; // Q8 + // sum_log_i_log_magn in Q17 + sum_log_i_log_magn += (kLogIndex[i] * log2) >> 3; + } + } + + // + //compute simplified noise model during startup + // + + // Estimate White noise + + // Switch whiteNoiseLevel to Q(minNorm-stages) + inst->whiteNoiseLevel >>= right_shifts_in_initMagnEst; + + // Update the average magnitude spectrum, used as noise estimate. + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(inst->sumMagn, inst->overdrive); + tmpU32no1 >>= inst->stages + 8; + + // Replacing division above with 'stages' shifts + // Shift to same Q-domain as whiteNoiseLevel + tmpU32no1 >>= right_shifts_in_magnU16; + // This operation is safe from wrap around as long as END_STARTUP_SHORT < 128 + RTC_DCHECK_LT(END_STARTUP_SHORT, 128); + inst->whiteNoiseLevel += tmpU32no1; // Q(minNorm-stages) + + // Estimate Pink noise parameters + // Denominator used in both parameter estimates. + // The value is only dependent on the size of the frequency band (kStartBand) + // and to reduce computational complexity stored in a table (kDeterminantEstMatrix[]) + RTC_DCHECK_LT(kStartBand, 66); + matrix_determinant = kDeterminantEstMatrix[kStartBand]; // Q0 + sum_log_i = kSumLogIndex[kStartBand]; // Q5 + sum_log_i_square = kSumSquareLogIndex[kStartBand]; // Q2 + if (inst->fs == 8000) { + // Adjust values to shorter blocks in narrow band. + tmp_1_w32 = (int32_t)matrix_determinant; + tmp_1_w32 += (kSumLogIndex[65] * sum_log_i) >> 9; + tmp_1_w32 -= (kSumLogIndex[65] * kSumLogIndex[65]) >> 10; + tmp_1_w32 -= (int32_t)sum_log_i_square << 4; + tmp_1_w32 -= ((inst->magnLen - kStartBand) * kSumSquareLogIndex[65]) >> 2; + matrix_determinant = (int16_t)tmp_1_w32; + sum_log_i -= kSumLogIndex[65]; // Q5 + sum_log_i_square -= kSumSquareLogIndex[65]; // Q2 + } + + // Necessary number of shifts to fit sum_log_magn in a word16 + zeros = 16 - WebRtcSpl_NormW32(sum_log_magn); + if (zeros < 0) { + zeros = 0; + } + tmp_1_w32 = sum_log_magn << 1; // Q9 + sum_log_magn_u16 = (uint16_t)(tmp_1_w32 >> zeros); // Q(9-zeros). + + // Calculate and update pinkNoiseNumerator. Result in Q11. + tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i_square, sum_log_magn_u16); // Q(11-zeros) + tmpU32no1 = sum_log_i_log_magn >> 12; // Q5 + + // Shift the largest value of sum_log_i and tmp32no3 before multiplication + tmp_u16 = ((uint16_t)sum_log_i << 1); // Q6 + if ((uint32_t)sum_log_i > tmpU32no1) { + tmp_u16 >>= zeros; + } else { + tmpU32no1 >>= zeros; + } + tmp_2_w32 -= (int32_t)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros) + matrix_determinant >>= zeros; // Q(-zeros) + tmp_2_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q11 + tmp_2_w32 += (int32_t)net_norm << 11; // Q11 + if (tmp_2_w32 < 0) { + tmp_2_w32 = 0; + } + inst->pinkNoiseNumerator += tmp_2_w32; // Q11 + + // Calculate and update pinkNoiseExp. Result in Q14. + tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i, sum_log_magn_u16); // Q(14-zeros) + tmp_1_w32 = sum_log_i_log_magn >> (3 + zeros); + tmp_1_w32 *= inst->magnLen - kStartBand; + tmp_2_w32 -= tmp_1_w32; // Q(14-zeros) + if (tmp_2_w32 > 0) { + // If the exponential parameter is negative force it to zero, which means a + // flat spectrum. + tmp_1_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q14 + inst->pinkNoiseExp += WEBRTC_SPL_SAT(16384, tmp_1_w32, 0); // Q14 + } + } +} + +void WebRtcNsx_DataSynthesis(NoiseSuppressionFixedC* inst, short* outFrame) { + int32_t energyOut; + + int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16]; + int16_t rfft_out_buff[ANAL_BLOCKL_MAX * 2 + 16]; + + // Align the structures to 32-byte boundary for the FFT function. + int16_t* realImag = (int16_t*) (((uintptr_t)realImag_buff + 31) & ~31); + int16_t* rfft_out = (int16_t*) (((uintptr_t) rfft_out_buff + 31) & ~31); + + int16_t tmp16no1, tmp16no2; + int16_t energyRatio; + int16_t gainFactor, gainFactor1, gainFactor2; + + size_t i; + int outCIFFT; + int scaleEnergyOut = 0; + + if (inst->zeroInputSignal) { + // synthesize the special case of zero input + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) { + outFrame[i] = inst->synthesisBuffer[i]; // Q0 + } + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, + inst->blockLen10ms); + return; + } + + // Filter the data in the frequency domain, and create spectrum. + WebRtcNsx_PrepareSpectrum(inst, realImag); + + // Inverse FFT output will be in rfft_out[]. + outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out); + + WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT); + + //scale factor: only do it after END_STARTUP_LONG time + gainFactor = 8192; // 8192 = Q13(1.0) + if (inst->gainMap == 1 && + inst->blockIndex > END_STARTUP_LONG && + inst->energyIn > 0) { + // Q(-scaleEnergyOut) + energyOut = WebRtcSpl_Energy(inst->real, inst->anaLen, &scaleEnergyOut); + if (scaleEnergyOut == 0 && !(energyOut & 0x7f800000)) { + energyOut = WEBRTC_SPL_SHIFT_W32(energyOut, 8 + scaleEnergyOut + - inst->scaleEnergyIn); + } else { + // |energyIn| is currently in Q(|scaleEnergyIn|), but to later on end up + // with an |energyRatio| in Q8 we need to change the Q-domain to + // Q(-8-scaleEnergyOut). + inst->energyIn >>= 8 + scaleEnergyOut - inst->scaleEnergyIn; + } + + RTC_DCHECK_GT(inst->energyIn, 0); + energyRatio = (energyOut + inst->energyIn / 2) / inst->energyIn; // Q8 + // Limit the ratio to [0, 1] in Q8, i.e., [0, 256] + energyRatio = WEBRTC_SPL_SAT(256, energyRatio, 0); + + // all done in lookup tables now + RTC_DCHECK_LT(energyRatio, 257); + gainFactor1 = kFactor1Table[energyRatio]; // Q8 + gainFactor2 = inst->factor2Table[energyRatio]; // Q8 + + //combine both scales with speech/noise prob: note prior (priorSpeechProb) is not frequency dependent + + // factor = inst->priorSpeechProb*factor1 + (1.0-inst->priorSpeechProb)*factor2; // original code + tmp16no1 = (int16_t)(((16384 - inst->priorNonSpeechProb) * gainFactor1) >> + 14); // in Q13, where 16384 = Q14(1.0) + tmp16no2 = (int16_t)((inst->priorNonSpeechProb * gainFactor2) >> 14); + gainFactor = tmp16no1 + tmp16no2; // Q13 + } // out of flag_gain_map==1 + + // Synthesis, read out fully processed segment, and update synthesis buffer. + WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor); +} + +void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { + // main routine for noise suppression + + uint32_t tmpU32no1, tmpU32no2, tmpU32no3; + uint32_t satMax, maxNoiseU32; + uint32_t tmpMagnU32, tmpNoiseU32; + uint32_t nearMagnEst; + uint32_t noiseUpdateU32; + uint32_t noiseU32[HALF_ANAL_BLOCKL]; + uint32_t postLocSnr[HALF_ANAL_BLOCKL]; + uint32_t priorLocSnr[HALF_ANAL_BLOCKL]; + uint32_t prevNearSnr[HALF_ANAL_BLOCKL]; + uint32_t curNearSnr; + uint32_t priorSnr; + uint32_t noise_estimate = 0; + uint32_t noise_estimate_avg = 0; + uint32_t numerator = 0; + + int32_t tmp32no1, tmp32no2; + int32_t pink_noise_num_avg = 0; + + uint16_t tmpU16no1; + uint16_t magnU16[HALF_ANAL_BLOCKL]; + uint16_t prevNoiseU16[HALF_ANAL_BLOCKL]; + uint16_t nonSpeechProbFinal[HALF_ANAL_BLOCKL]; + uint16_t gammaNoise, prevGammaNoise; + uint16_t noiseSupFilterTmp[HALF_ANAL_BLOCKL]; + + int16_t qMagn, qNoise; + int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB; + int16_t pink_noise_exp_avg = 0; + + size_t i, j; + int nShifts, postShifts; + int norm32no1, norm32no2; + int flag, sign; + int q_domain_to_use = 0; + + // Code for ARMv7-Neon platform assumes the following: + RTC_DCHECK_GT(inst->anaLen, 0); + RTC_DCHECK_GT(inst->anaLen2, 0); + RTC_DCHECK_EQ(0, inst->anaLen % 16); + RTC_DCHECK_EQ(0, inst->anaLen2 % 8); + RTC_DCHECK_GT(inst->blockLen10ms, 0); + RTC_DCHECK_EQ(0, inst->blockLen10ms % 16); + RTC_DCHECK_EQ(inst->magnLen, inst->anaLen2 + 1); + +#ifdef NS_FILEDEBUG + if (fwrite(spframe, sizeof(short), + inst->blockLen10ms, inst->infile) != inst->blockLen10ms) { + RTC_NOTREACHED(); + } +#endif + + // Check that initialization has been done + RTC_DCHECK_EQ(1, inst->initFlag); + RTC_DCHECK_LE(num_bands - 1, NUM_HIGH_BANDS_MAX); + + const short* const* speechFrameHB = NULL; + short* const* outFrameHB = NULL; + size_t num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = (size_t)(num_bands - 1); + } + + // Store speechFrame and transform to frequency domain + WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16); + + if (inst->zeroInputSignal) { + WebRtcNsx_DataSynthesis(inst, outFrame[0]); + + if (num_bands > 1) { + // update analysis buffer for H band + // append new data to buffer FX + for (i = 0; i < num_high_bands; ++i) { + int block_shift = inst->anaLen - inst->blockLen10ms; + memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms, + block_shift * sizeof(*inst->dataBufHBFX[i])); + memcpy(inst->dataBufHBFX[i] + block_shift, speechFrameHB[i], + inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i])); + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0 + } + } + } // end of H band gain computation + return; + } + + // Update block index when we have something to process + inst->blockIndex++; + // + + // Norm of magn + qMagn = inst->normData - inst->stages; + + // Compute spectral flatness on input spectrum + WebRtcNsx_ComputeSpectralFlatness(inst, magnU16); + + // quantile noise estimate + WebRtcNsx_NoiseEstimation(inst, magnU16, noiseU32, &qNoise); + + //noise estimate from previous frame + for (i = 0; i < inst->magnLen; i++) { + prevNoiseU16[i] = (uint16_t)(inst->prevNoiseU32[i] >> 11); // Q(prevQNoise) + } + + if (inst->blockIndex < END_STARTUP_SHORT) { + // Noise Q-domain to be used later; see description at end of section. + q_domain_to_use = WEBRTC_SPL_MIN((int)qNoise, inst->minNorm - inst->stages); + + // Calculate frequency independent parts in parametric noise estimate and calculate + // the estimate for the lower frequency band (same values for all frequency bins) + if (inst->pinkNoiseExp) { + pink_noise_exp_avg = (int16_t)WebRtcSpl_DivW32W16(inst->pinkNoiseExp, + (int16_t)(inst->blockIndex + 1)); // Q14 + pink_noise_num_avg = WebRtcSpl_DivW32W16(inst->pinkNoiseNumerator, + (int16_t)(inst->blockIndex + 1)); // Q11 + WebRtcNsx_CalcParametricNoiseEstimate(inst, + pink_noise_exp_avg, + pink_noise_num_avg, + kStartBand, + &noise_estimate, + &noise_estimate_avg); + } else { + // Use white noise estimate if we have poor pink noise parameter estimates + noise_estimate = inst->whiteNoiseLevel; // Q(minNorm-stages) + noise_estimate_avg = noise_estimate / (inst->blockIndex + 1); // Q(minNorm-stages) + } + for (i = 0; i < inst->magnLen; i++) { + // Estimate the background noise using the pink noise parameters if permitted + if ((inst->pinkNoiseExp) && (i >= kStartBand)) { + // Reset noise_estimate + noise_estimate = 0; + noise_estimate_avg = 0; + // Calculate the parametric noise estimate for current frequency bin + WebRtcNsx_CalcParametricNoiseEstimate(inst, + pink_noise_exp_avg, + pink_noise_num_avg, + i, + &noise_estimate, + &noise_estimate_avg); + } + // Calculate parametric Wiener filter + noiseSupFilterTmp[i] = inst->denoiseBound; + if (inst->initMagnEst[i]) { + // numerator = (initMagnEst - noise_estimate * overdrive) + // Result in Q(8+minNorm-stages) + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(noise_estimate, inst->overdrive); + numerator = inst->initMagnEst[i] << 8; + if (numerator > tmpU32no1) { + // Suppression filter coefficient larger than zero, so calculate. + numerator -= tmpU32no1; + + // Determine number of left shifts in numerator for best accuracy after + // division + nShifts = WebRtcSpl_NormU32(numerator); + nShifts = WEBRTC_SPL_SAT(6, nShifts, 0); + + // Shift numerator to Q(nShifts+8+minNorm-stages) + numerator <<= nShifts; + + // Shift denominator to Q(nShifts-6+minNorm-stages) + tmpU32no1 = inst->initMagnEst[i] >> (6 - nShifts); + if (tmpU32no1 == 0) { + // This is only possible if numerator = 0, in which case + // we don't need any division. + tmpU32no1 = 1; + } + tmpU32no2 = numerator / tmpU32no1; // Q14 + noiseSupFilterTmp[i] = (uint16_t)WEBRTC_SPL_SAT(16384, tmpU32no2, + (uint32_t)(inst->denoiseBound)); // Q14 + } + } + // Weight quantile noise 'noiseU32' with modeled noise 'noise_estimate_avg' + // 'noiseU32 is in Q(qNoise) and 'noise_estimate' in Q(minNorm-stages) + // To guarantee that we do not get wrap around when shifting to the same domain + // we use the lowest one. Furthermore, we need to save 6 bits for the weighting. + // 'noise_estimate_avg' can handle this operation by construction, but 'noiseU32' + // may not. + + // Shift 'noiseU32' to 'q_domain_to_use' + tmpU32no1 = noiseU32[i] >> (qNoise - q_domain_to_use); + // Shift 'noise_estimate_avg' to 'q_domain_to_use' + tmpU32no2 = noise_estimate_avg >> + (inst->minNorm - inst->stages - q_domain_to_use); + // Make a simple check to see if we have enough room for weighting 'tmpU32no1' + // without wrap around + nShifts = 0; + if (tmpU32no1 & 0xfc000000) { + tmpU32no1 >>= 6; + tmpU32no2 >>= 6; + nShifts = 6; + } + tmpU32no1 *= inst->blockIndex; + tmpU32no2 *= (END_STARTUP_SHORT - inst->blockIndex); + // Add them together and divide by startup length + noiseU32[i] = WebRtcSpl_DivU32U16(tmpU32no1 + tmpU32no2, END_STARTUP_SHORT); + // Shift back if necessary + noiseU32[i] <<= nShifts; + } + // Update new Q-domain for 'noiseU32' + qNoise = q_domain_to_use; + } + // compute average signal during END_STARTUP_LONG time: + // used to normalize spectral difference measure + if (inst->blockIndex < END_STARTUP_LONG) { + // substituting division with shift ending up in Q(-2*stages) + inst->timeAvgMagnEnergyTmp += + inst->magnEnergy >> (2 * inst->normData + inst->stages - 1); + inst->timeAvgMagnEnergy = WebRtcSpl_DivU32U16(inst->timeAvgMagnEnergyTmp, + inst->blockIndex + 1); + } + + //start processing at frames == converged+1 + // STEP 1: compute prior and post SNR based on quantile noise estimates + + // compute direct decision (DD) estimate of prior SNR: needed for new method + satMax = (uint32_t)1048575;// Largest possible value without getting overflow despite shifting 12 steps + postShifts = 6 + qMagn - qNoise; + nShifts = 5 - inst->prevQMagn + inst->prevQNoise; + for (i = 0; i < inst->magnLen; i++) { + // FLOAT: + // post SNR + // postLocSnr[i] = 0.0; + // if (magn[i] > noise[i]) + // { + // postLocSnr[i] = magn[i] / (noise[i] + 0.0001); + // } + // // previous post SNR + // // previous estimate: based on previous frame with gain filter (smooth is previous filter) + // + // prevNearSnr[i] = inst->prevMagnU16[i] / (inst->noisePrev[i] + 0.0001) * (inst->smooth[i]); + // + // // DD estimate is sum of two terms: current estimate and previous estimate + // // directed decision update of priorSnr (or we actually store [2*priorSnr+1]) + // + // priorLocSnr[i] = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * (postLocSnr[i] - 1.0); + + // calculate post SNR: output in Q11 + postLocSnr[i] = 2048; // 1.0 in Q11 + tmpU32no1 = (uint32_t)magnU16[i] << 6; // Q(6+qMagn) + if (postShifts < 0) { + tmpU32no2 = noiseU32[i] >> -postShifts; // Q(6+qMagn) + } else { + tmpU32no2 = noiseU32[i] << postShifts; // Q(6+qMagn) + } + if (tmpU32no1 > tmpU32no2) { + // Current magnitude larger than noise + tmpU32no1 <<= 11; // Q(17+qMagn) + if (tmpU32no2 > 0) { + tmpU32no1 /= tmpU32no2; // Q11 + postLocSnr[i] = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } else { + postLocSnr[i] = satMax; + } + } + + // calculate prevNearSnr[i] and save for later instead of recalculating it later + // |nearMagnEst| in Q(prevQMagn + 14) + nearMagnEst = inst->prevMagnU16[i] * inst->noiseSupFilter[i]; + tmpU32no1 = nearMagnEst << 3; // Q(prevQMagn+17) + tmpU32no2 = inst->prevNoiseU32[i] >> nShifts; // Q(prevQMagn+6) + + if (tmpU32no2 > 0) { + tmpU32no1 /= tmpU32no2; // Q11 + tmpU32no1 = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } else { + tmpU32no1 = satMax; // Q11 + } + prevNearSnr[i] = tmpU32no1; // Q11 + + //directed decision update of priorSnr + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22 + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(postLocSnr[i] - 2048, ONE_MINUS_DD_PR_SNR_Q11); // Q22 + priorSnr = tmpU32no1 + tmpU32no2 + 512; // Q22 (added 512 for rounding) + // priorLocSnr = 1 + 2*priorSnr + priorLocSnr[i] = 2048 + (priorSnr >> 10); // Q11 + } // end of loop over frequencies + // done with step 1: DD computation of prior and post SNR + + // STEP 2: compute speech/noise likelihood + + //compute difference of input spectrum with learned/estimated noise spectrum + WebRtcNsx_ComputeSpectralDifference(inst, magnU16); + //compute histograms for determination of parameters (thresholds and weights for features) + //parameters are extracted once every window time (=inst->modelUpdate) + //counter update + inst->cntThresUpdate++; + flag = (int)(inst->cntThresUpdate == inst->modelUpdate); + //update histogram + WebRtcNsx_FeatureParameterExtraction(inst, flag); + //compute model parameters + if (flag) { + inst->cntThresUpdate = 0; // Reset counter + //update every window: + // get normalization for spectral difference for next window estimate + + // Shift to Q(-2*stages) + inst->curAvgMagnEnergy >>= STAT_UPDATES; + + tmpU32no1 = (inst->curAvgMagnEnergy + inst->timeAvgMagnEnergy + 1) >> 1; //Q(-2*stages) + // Update featureSpecDiff + if ((tmpU32no1 != inst->timeAvgMagnEnergy) && (inst->featureSpecDiff) && + (inst->timeAvgMagnEnergy > 0)) { + norm32no1 = 0; + tmpU32no3 = tmpU32no1; + while (0xFFFF0000 & tmpU32no3) { + tmpU32no3 >>= 1; + norm32no1++; + } + tmpU32no2 = inst->featureSpecDiff; + while (0xFFFF0000 & tmpU32no2) { + tmpU32no2 >>= 1; + norm32no1++; + } + tmpU32no3 = WEBRTC_SPL_UMUL(tmpU32no3, tmpU32no2); + tmpU32no3 /= inst->timeAvgMagnEnergy; + if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) { + inst->featureSpecDiff = 0x007FFFFF; + } else { + inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF, + tmpU32no3 << norm32no1); + } + } + + inst->timeAvgMagnEnergy = tmpU32no1; // Q(-2*stages) + inst->curAvgMagnEnergy = 0; + } + + //compute speech/noise probability + WebRtcNsx_SpeechNoiseProb(inst, nonSpeechProbFinal, priorLocSnr, postLocSnr); + + //time-avg parameter for noise update + gammaNoise = NOISE_UPDATE_Q8; // Q8 + + maxNoiseU32 = 0; + postShifts = inst->prevQNoise - qMagn; + nShifts = inst->prevQMagn - qMagn; + for (i = 0; i < inst->magnLen; i++) { + // temporary noise update: use it for speech frames if update value is less than previous + // the formula has been rewritten into: + // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i]) + + if (postShifts < 0) { + tmpU32no2 = magnU16[i] >> -postShifts; // Q(prevQNoise) + } else { + tmpU32no2 = (uint32_t)magnU16[i] << postShifts; // Q(prevQNoise) + } + if (prevNoiseU16[i] > tmpU32no2) { + sign = -1; + tmpU32no1 = prevNoiseU16[i] - tmpU32no2; + } else { + sign = 1; + tmpU32no1 = tmpU32no2 - prevNoiseU16[i]; + } + noiseUpdateU32 = inst->prevNoiseU32[i]; // Q(prevQNoise+11) + tmpU32no3 = 0; + if ((tmpU32no1) && (nonSpeechProbFinal[i])) { + // This value will be used later, if gammaNoise changes + tmpU32no3 = WEBRTC_SPL_UMUL_32_16(tmpU32no1, nonSpeechProbFinal[i]); // Q(prevQNoise+8) + if (0x7c000000 & tmpU32no3) { + // Shifting required before multiplication + tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11) + } else { + // We can do shifting after multiplication + tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11) + } + if (sign > 0) { + noiseUpdateU32 += tmpU32no2; // Q(prevQNoise+11) + } else { + // This operation is safe. We can never get wrap around, since worst + // case scenario means magnU16 = 0 + noiseUpdateU32 -= tmpU32no2; // Q(prevQNoise+11) + } + } + + //increase gamma (i.e., less noise update) for frame likely to be speech + prevGammaNoise = gammaNoise; + gammaNoise = NOISE_UPDATE_Q8; + //time-constant based on speech/noise state + //increase gamma (i.e., less noise update) for frames likely to be speech + if (nonSpeechProbFinal[i] < ONE_MINUS_PROB_RANGE_Q8) { + gammaNoise = GAMMA_NOISE_TRANS_AND_SPEECH_Q8; + } + + if (prevGammaNoise != gammaNoise) { + // new noise update + // this line is the same as above, only that the result is stored in a different variable and the gammaNoise + // has changed + // + // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i]) + + if (0x7c000000 & tmpU32no3) { + // Shifting required before multiplication + tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11) + } else { + // We can do shifting after multiplication + tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11) + } + if (sign > 0) { + tmpU32no1 = inst->prevNoiseU32[i] + tmpU32no2; // Q(prevQNoise+11) + } else { + tmpU32no1 = inst->prevNoiseU32[i] - tmpU32no2; // Q(prevQNoise+11) + } + if (noiseUpdateU32 > tmpU32no1) { + noiseUpdateU32 = tmpU32no1; // Q(prevQNoise+11) + } + } + noiseU32[i] = noiseUpdateU32; // Q(prevQNoise+11) + if (noiseUpdateU32 > maxNoiseU32) { + maxNoiseU32 = noiseUpdateU32; + } + + // conservative noise update + // // original FLOAT code + // if (prob_speech < PROB_RANGE) { + // inst->avgMagnPause[i] = inst->avgMagnPause[i] + (1.0 - gamma_pause)*(magn[i] - inst->avgMagnPause[i]); + // } + + tmp32no2 = WEBRTC_SPL_SHIFT_W32(inst->avgMagnPause[i], -nShifts); + if (nonSpeechProbFinal[i] > ONE_MINUS_PROB_RANGE_Q8) { + if (nShifts < 0) { + tmp32no1 = (int32_t)magnU16[i] - tmp32no2; // Q(qMagn) + tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts) + tmp32no1 = (tmp32no1 + 128) >> 8; // Q(qMagn). + } else { + // In Q(qMagn+nShifts) + tmp32no1 = ((int32_t)magnU16[i] << nShifts) - inst->avgMagnPause[i]; + tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts) + tmp32no1 = (tmp32no1 + (128 << nShifts)) >> (8 + nShifts); // Q(qMagn). + } + tmp32no2 += tmp32no1; // Q(qMagn) + } + inst->avgMagnPause[i] = tmp32no2; + } // end of frequency loop + + norm32no1 = WebRtcSpl_NormU32(maxNoiseU32); + qNoise = inst->prevQNoise + norm32no1 - 5; + // done with step 2: noise update + + // STEP 3: compute dd update of prior snr and post snr based on new noise estimate + nShifts = inst->prevQNoise + 11 - qMagn; + for (i = 0; i < inst->magnLen; i++) { + // FLOAT code + // // post and prior SNR + // curNearSnr = 0.0; + // if (magn[i] > noise[i]) + // { + // curNearSnr = magn[i] / (noise[i] + 0.0001) - 1.0; + // } + // // DD estimate is sum of two terms: current estimate and previous estimate + // // directed decision update of snrPrior + // snrPrior = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * curNearSnr; + // // gain filter + // tmpFloat1 = inst->overdrive + snrPrior; + // tmpFloat2 = snrPrior / tmpFloat1; + // theFilter[i] = tmpFloat2; + + // calculate curNearSnr again, this is necessary because a new noise estimate has been made since then. for the original + curNearSnr = 0; // Q11 + if (nShifts < 0) { + // This case is equivalent with magn < noise which implies curNearSnr = 0; + tmpMagnU32 = (uint32_t)magnU16[i]; // Q(qMagn) + tmpNoiseU32 = noiseU32[i] << -nShifts; // Q(qMagn) + } else if (nShifts > 17) { + tmpMagnU32 = (uint32_t)magnU16[i] << 17; // Q(qMagn+17) + tmpNoiseU32 = noiseU32[i] >> (nShifts - 17); // Q(qMagn+17) + } else { + tmpMagnU32 = (uint32_t)magnU16[i] << nShifts; // Q(qNoise_prev+11) + tmpNoiseU32 = noiseU32[i]; // Q(qNoise_prev+11) + } + if (tmpMagnU32 > tmpNoiseU32) { + tmpU32no1 = tmpMagnU32 - tmpNoiseU32; // Q(qCur) + norm32no2 = WEBRTC_SPL_MIN(11, WebRtcSpl_NormU32(tmpU32no1)); + tmpU32no1 <<= norm32no2; // Q(qCur+norm32no2) + tmpU32no2 = tmpNoiseU32 >> (11 - norm32no2); // Q(qCur+norm32no2-11) + if (tmpU32no2 > 0) { + tmpU32no1 /= tmpU32no2; // Q11 + } + curNearSnr = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } + + //directed decision update of priorSnr + // FLOAT + // priorSnr = DD_PR_SNR * prevNearSnr + (1.0-DD_PR_SNR) * curNearSnr; + + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22 + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(curNearSnr, ONE_MINUS_DD_PR_SNR_Q11); // Q22 + priorSnr = tmpU32no1 + tmpU32no2; // Q22 + + //gain filter + tmpU32no1 = inst->overdrive + ((priorSnr + 8192) >> 14); // Q8 + RTC_DCHECK_GT(inst->overdrive, 0); + tmpU16no1 = (priorSnr + tmpU32no1 / 2) / tmpU32no1; // Q14 + inst->noiseSupFilter[i] = WEBRTC_SPL_SAT(16384, tmpU16no1, inst->denoiseBound); // 16384 = Q14(1.0) // Q14 + + // Weight in the parametric Wiener filter during startup + if (inst->blockIndex < END_STARTUP_SHORT) { + // Weight the two suppression filters + tmpU32no1 = inst->noiseSupFilter[i] * inst->blockIndex; + tmpU32no2 = noiseSupFilterTmp[i] * + (END_STARTUP_SHORT - inst->blockIndex); + tmpU32no1 += tmpU32no2; + inst->noiseSupFilter[i] = (uint16_t)WebRtcSpl_DivU32U16(tmpU32no1, + END_STARTUP_SHORT); + } + } // end of loop over frequencies + //done with step3 + + // save noise and magnitude spectrum for next frame + inst->prevQNoise = qNoise; + inst->prevQMagn = qMagn; + if (norm32no1 > 5) { + for (i = 0; i < inst->magnLen; i++) { + inst->prevNoiseU32[i] = noiseU32[i] << (norm32no1 - 5); // Q(qNoise+11) + inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) + } + } else { + for (i = 0; i < inst->magnLen; i++) { + inst->prevNoiseU32[i] = noiseU32[i] >> (5 - norm32no1); // Q(qNoise+11) + inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) + } + } + + WebRtcNsx_DataSynthesis(inst, outFrame[0]); +#ifdef NS_FILEDEBUG + if (fwrite(outframe, sizeof(short), + inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) { + RTC_NOTREACHED(); + } +#endif + + //for H band: + // only update data buffer, then apply time-domain gain is applied derived from L band + if (num_bands > 1) { + // update analysis buffer for H band + // append new data to buffer FX + for (i = 0; i < num_high_bands; ++i) { + memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->dataBufHBFX[i])); + memcpy(inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms, + speechFrameHB[i], inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i])); + } + // range for averaging low band quantities for H band gain + + gainTimeDomainHB = 16384; // 16384 = Q14(1.0) + //average speech prob from low band + //average filter gain from low band + //avg over second half (i.e., 4->8kHz) of freq. spectrum + tmpU32no1 = 0; // Q12 + tmpU16no1 = 0; // Q8 + for (i = inst->anaLen2 - (inst->anaLen2 >> 2); i < inst->anaLen2; i++) { + tmpU16no1 += nonSpeechProbFinal[i]; // Q8 + tmpU32no1 += (uint32_t)(inst->noiseSupFilter[i]); // Q14 + } + RTC_DCHECK_GE(inst->stages, 7); + avgProbSpeechHB = (4096 - (tmpU16no1 >> (inst->stages - 7))); // Q12 + avgFilterGainHB = (int16_t)(tmpU32no1 >> (inst->stages - 3)); // Q14 + + // // original FLOAT code + // // gain based on speech probability: + // avg_prob_speech_tt=(float)2.0*avg_prob_speech-(float)1.0; + // gain_mod=(float)0.5*((float)1.0+(float)tanh(avg_prob_speech_tt)); // between 0 and 1 + + // gain based on speech probability: + // original expression: "0.5 * (1 + tanh(2x-1))" + // avgProbSpeechHB has been anyway saturated to a value between 0 and 1 so the other cases don't have to be dealt with + // avgProbSpeechHB and gainModHB are in Q12, 3607 = Q12(0.880615234375) which is a zero point of + // |0.5 * (1 + tanh(2x-1)) - x| - |0.5 * (1 + tanh(2x-1)) - 0.880615234375| meaning that from that point the error of approximating + // the expression with f(x) = x would be greater than the error of approximating the expression with f(x) = 0.880615234375 + // error: "|0.5 * (1 + tanh(2x-1)) - x| from x=0 to 0.880615234375" -> http://www.wolframalpha.com/input/?i=|0.5+*+(1+%2B+tanh(2x-1))+-+x|+from+x%3D0+to+0.880615234375 + // and: "|0.5 * (1 + tanh(2x-1)) - 0.880615234375| from x=0.880615234375 to 1" -> http://www.wolframalpha.com/input/?i=+|0.5+*+(1+%2B+tanh(2x-1))+-+0.880615234375|+from+x%3D0.880615234375+to+1 + gainModHB = WEBRTC_SPL_MIN(avgProbSpeechHB, 3607); + + // // original FLOAT code + // //combine gain with low band gain + // if (avg_prob_speech < (float)0.5) { + // gain_time_domain_HB=(float)0.5*gain_mod+(float)0.5*avg_filter_gain; + // } + // else { + // gain_time_domain_HB=(float)0.25*gain_mod+(float)0.75*avg_filter_gain; + // } + + + //combine gain with low band gain + if (avgProbSpeechHB < 2048) { + // 2048 = Q12(0.5) + // the next two lines in float are "gain_time_domain = 0.5 * gain_mod + 0.5 * avg_filter_gain"; Q2(0.5) = 2 equals one left shift + gainTimeDomainHB = (gainModHB << 1) + (avgFilterGainHB >> 1); // Q14 + } else { + // "gain_time_domain = 0.25 * gain_mod + 0.75 * agv_filter_gain;" + gainTimeDomainHB = (int16_t)((3 * avgFilterGainHB) >> 2); // 3 = Q2(0.75) + gainTimeDomainHB += gainModHB; // Q14 + } + //make sure gain is within flooring range + gainTimeDomainHB + = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (int16_t)(inst->denoiseBound)); // 16384 = Q14(1.0) + + + //apply gain + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = (int16_t)((gainTimeDomainHB * + inst->dataBufHBFX[i][j]) >> 14); // Q0 + } + } + } // end of H band gain computation +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.h b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.h new file mode 100644 index 0000000000..c8097f7946 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core.h @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ +#define MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ + +#ifdef NS_FILEDEBUG +#include <stdio.h> +#endif + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/ns/nsx_defines.h" +#include "typedefs.h" // NOLINT(build/include) + +typedef struct NoiseSuppressionFixedC_ { + uint32_t fs; + + const int16_t* window; + int16_t analysisBuffer[ANAL_BLOCKL_MAX]; + int16_t synthesisBuffer[ANAL_BLOCKL_MAX]; + uint16_t noiseSupFilter[HALF_ANAL_BLOCKL]; + uint16_t overdrive; /* Q8 */ + uint16_t denoiseBound; /* Q14 */ + const int16_t* factor2Table; + int16_t noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL]; + int16_t noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL]; + int16_t noiseEstCounter[SIMULT]; + int16_t noiseEstQuantile[HALF_ANAL_BLOCKL]; + + size_t anaLen; + size_t anaLen2; + size_t magnLen; + int aggrMode; + int stages; + int initFlag; + int gainMap; + + int32_t maxLrt; + int32_t minLrt; + // Log LRT factor with time-smoothing in Q8. + int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; + int32_t featureLogLrt; + int32_t thresholdLogLrt; + int16_t weightLogLrt; + + uint32_t featureSpecDiff; + uint32_t thresholdSpecDiff; + int16_t weightSpecDiff; + + uint32_t featureSpecFlat; + uint32_t thresholdSpecFlat; + int16_t weightSpecFlat; + + // Conservative estimate of noise spectrum. + int32_t avgMagnPause[HALF_ANAL_BLOCKL]; + uint32_t magnEnergy; + uint32_t sumMagn; + uint32_t curAvgMagnEnergy; + uint32_t timeAvgMagnEnergy; + uint32_t timeAvgMagnEnergyTmp; + + uint32_t whiteNoiseLevel; // Initial noise estimate. + // Initial magnitude spectrum estimate. + uint32_t initMagnEst[HALF_ANAL_BLOCKL]; + // Pink noise parameters: + int32_t pinkNoiseNumerator; // Numerator. + int32_t pinkNoiseExp; // Power of freq. + int minNorm; // Smallest normalization factor. + int zeroInputSignal; // Zero input signal flag. + + // Noise spectrum from previous frame. + uint32_t prevNoiseU32[HALF_ANAL_BLOCKL]; + // Magnitude spectrum from previous frame. + uint16_t prevMagnU16[HALF_ANAL_BLOCKL]; + // Prior speech/noise probability in Q14. + int16_t priorNonSpeechProb; + + int blockIndex; // Frame index counter. + // Parameter for updating or estimating thresholds/weights for prior model. + int modelUpdate; + int cntThresUpdate; + + // Histograms for parameter estimation. + int16_t histLrt[HIST_PAR_EST]; + int16_t histSpecFlat[HIST_PAR_EST]; + int16_t histSpecDiff[HIST_PAR_EST]; + + // Quantities for high band estimate. + int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; + + int qNoise; + int prevQNoise; + int prevQMagn; + size_t blockLen10ms; + + int16_t real[ANAL_BLOCKL_MAX]; + int16_t imag[ANAL_BLOCKL_MAX]; + int32_t energyIn; + int scaleEnergyIn; + int normData; + + struct RealFFT* real_fft; +} NoiseSuppressionFixedC; + +#ifdef __cplusplus +extern "C" +{ +#endif + +/**************************************************************************** + * WebRtcNsx_InitCore(...) + * + * This function initializes a noise suppression instance + * + * Input: + * - inst : Instance that should be initialized + * - fs : Sampling frequency + * + * Output: + * - inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs); + +/**************************************************************************** + * WebRtcNsx_set_policy_core(...) + * + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - inst : Instance that should be initialized + * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * + * Output: + * - inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode); + +/**************************************************************************** + * WebRtcNsx_ProcessCore + * + * Do noise suppression. + * + * Input: + * - inst : Instance that should be initialized + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands + * + * Output: + * - inst : Updated instance + * - outFrame : Output speech frame for each band + */ +void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst, + const short* const* inFrame, + int num_bands, + short* const* outFrame); + +/**************************************************************************** + * Some function pointers, for internal functions shared by ARM NEON and + * generic C code. + */ +// Noise Estimation. +typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise); +extern NoiseEstimation WebRtcNsx_NoiseEstimation; + +// Filter the data in the frequency domain, and create spectrum. +typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +extern PrepareSpectrum WebRtcNsx_PrepareSpectrum; + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +extern SynthesisUpdate WebRtcNsx_SynthesisUpdate; + +// Update analysis buffer for lower band, and window data before FFT. +typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +extern AnalysisUpdate WebRtcNsx_AnalysisUpdate; + +// Denormalize the real-valued signal |in|, the output from inverse FFT. +typedef void (*Denormalize)(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor); +extern Denormalize WebRtcNsx_Denormalize; + +// Normalize the real-valued signal |in|, the input to forward FFT. +typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out); +extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; + +// Compute speech/noise probability. +// Intended to be private. +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr); + +#if defined(WEBRTC_HAS_NEON) +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file nsx_core.c, while those for ARM Neon platforms +// are declared below and defined in file nsx_core_neon.c. +void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise); +void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +#endif + +#if defined(MIPS32_LE) +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file nsx_core.c, while those for MIPS platforms +// are declared below and defined in file nsx_core_mips.c. +void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out); +#if defined(MIPS_DSP_R1_LE) +void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor); +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_c.c b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_c.c new file mode 100644 index 0000000000..162fb1990a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_c.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "rtc_base/checks.h" +#include "modules/audio_processing/ns/noise_suppression_x.h" +#include "modules/audio_processing/ns/nsx_core.h" +#include "modules/audio_processing/ns/nsx_defines.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; + int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; + int32_t frac32, logTmp; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; + size_t i; + int normTmp, normTmp2, nShifts; + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11 + normTmp = WebRtcSpl_NormU32(postLocSnr[i]); + num = postLocSnr[i] << normTmp; // Q(11+normTmp) + if (normTmp > 10) { + den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp) + } else { + den = priorLocSnr[i] >> (11 - normTmp); // Q(normTmp) + } + if (den > 0) { + besselTmpFX32 -= num / den; // Q11 + } else { + besselTmpFX32 = 0; + } + + // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) + // - inst->logLrtTimeAvg[i]); + // Here, LRT_TAVG = 0.5 + zeros = WebRtcSpl_NormU32(priorLocSnr[i]); + frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); + tmp32 = (frac32 * frac32 * -43) >> 19; + tmp32 += ((int16_t)frac32 * 5412) >> 12; + frac32 = tmp32 + 37; + // tmp32 = log2(priorLocSnr[i]) + tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 + logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2) + // tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12. + tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2; + inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 + + logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 + } + inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> + (inst->stages + 11); + + // done with computation of LR factor + + // + //compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + if (tmp32no1 < (16 << 14) && tmp32no1 >= 0) { + tableIndex = (int16_t)(tmp32no1 >> 14); + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + if (tmpU32no1 < (16 << 14)) { + tableIndex = (int16_t)(tmpU32no1 >> 14); + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + RTC_DCHECK_GE(normTmp, 0); + tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) + tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 /= tmpU32no2; + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = tmpU32no2 >> nShifts; + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + if (tmpU32no1 < (16 << 14)) { + tableIndex = (int16_t)(tmpU32no1 >> 14); + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + for (i = 0; i < inst->magnLen; i++) { + // FLOAT code + // invLrt = exp(inst->logLrtTimeAvg[i]); + // invLrt = inst->priorSpeechProb * invLrt; + // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / + // (1.0 - inst->priorSpeechProb + invLrt); + // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; + // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / + // (inst->priorNonSpeechProb + invLrt); + if (inst->logLrtTimeAvgW32[i] < 65300) { + tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14; // Q12 + intPart = (int16_t)(tmp32no1 >> 12); + if (intPart < -8) { + intPart = -8; + } + frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12 + + // Quadratic approximation of 2^frac + tmp32no2 = (frac * frac * 44) >> 19; // Q12. + tmp32no2 += (frac * 84) >> 7; // Q12 + invLrtFX = (1 << (8 + intPart)) + + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 + + normTmp = WebRtcSpl_NormW32(invLrtFX); + normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); + if (normTmp + normTmp2 >= 7) { + if (normTmp + normTmp2 < 15) { + invLrtFX >>= 15 - normTmp2 - normTmp; + // Q(normTmp+normTmp2-7) + tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); + // Q(normTmp+normTmp2+7) + invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); + // Q14 + } else { + tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); + // Q22 + invLrtFX = tmp32no1 >> 8; // Q14. + } + + tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8; // Q22 + + nonSpeechProbFinal[i] = tmp32no1 / + (inst->priorNonSpeechProb + invLrtFX); // Q8 + } + } + } + } +} + diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_mips.c new file mode 100644 index 0000000000..d58a9b2347 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_mips.c @@ -0,0 +1,1002 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <string.h> + +#include "rtc_base/checks.h" +#include "modules/audio_processing/ns/noise_suppression_x.h" +#include "modules/audio_processing/ns/nsx_core.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + uint32_t tmpU32no1, tmpU32no2, tmpU32no3; + int32_t indPriorFX, tmp32no1; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac; + size_t i; + int normTmp, nShifts; + + int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; + int32_t const_max = 0x7fffffff; + int32_t const_neg43 = -43; + int32_t const_5412 = 5412; + int32_t const_11rsh12 = (11 << 12); + int32_t const_178 = 178; + + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + r0 = postLocSnr[i]; // Q11 + r1 = priorLocSnr[i]; + r2 = inst->logLrtTimeAvgW32[i]; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "clz %[r3], %[r0] \n\t" + "clz %[r5], %[r1] \n\t" + "slti %[r4], %[r3], 32 \n\t" + "slti %[r6], %[r5], 32 \n\t" + "movz %[r3], $0, %[r4] \n\t" + "movz %[r5], $0, %[r6] \n\t" + "slti %[r4], %[r3], 11 \n\t" + "addiu %[r6], %[r3], -11 \n\t" + "neg %[r7], %[r6] \n\t" + "sllv %[r6], %[r1], %[r6] \n\t" + "srav %[r7], %[r1], %[r7] \n\t" + "movn %[r6], %[r7], %[r4] \n\t" + "sllv %[r1], %[r1], %[r5] \n\t" + "and %[r1], %[r1], %[const_max] \n\t" + "sra %[r1], %[r1], 19 \n\t" + "mul %[r7], %[r1], %[r1] \n\t" + "sllv %[r3], %[r0], %[r3] \n\t" + "divu %[r8], %[r3], %[r6] \n\t" + "slti %[r6], %[r6], 1 \n\t" + "mul %[r7], %[r7], %[const_neg43] \n\t" + "sra %[r7], %[r7], 19 \n\t" + "movz %[r3], %[r8], %[r6] \n\t" + "subu %[r0], %[r0], %[r3] \n\t" + "movn %[r0], $0, %[r6] \n\t" + "mul %[r1], %[r1], %[const_5412] \n\t" + "sra %[r1], %[r1], 12 \n\t" + "addu %[r7], %[r7], %[r1] \n\t" + "addiu %[r1], %[r7], 37 \n\t" + "addiu %[r5], %[r5], -31 \n\t" + "neg %[r5], %[r5] \n\t" + "sll %[r5], %[r5], 12 \n\t" + "addu %[r5], %[r5], %[r1] \n\t" + "subu %[r7], %[r5], %[const_11rsh12] \n\t" + "mul %[r7], %[r7], %[const_178] \n\t" + "sra %[r7], %[r7], 8 \n\t" + "addu %[r7], %[r7], %[r2] \n\t" + "sra %[r7], %[r7], 1 \n\t" + "subu %[r2], %[r2], %[r7] \n\t" + "addu %[r2], %[r2], %[r0] \n\t" + ".set pop \n\t" + : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8) + : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43), + [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12), + [const_178] "r" (const_178) + : "hi", "lo" + ); + inst->logLrtTimeAvgW32[i] = r2; + logLrtTimeAvgKsumFX += r2; + } + + inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> + (inst->stages + 11); + + // done with computation of LR factor + + // + // compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + if (tmp32no1 < (16 << 14) && tmp32no1 >= 0) { + tableIndex = (int16_t)(tmp32no1 >> 14); + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + if (tmpU32no1 < (16 << 14)) { + tableIndex = (int16_t)(tmpU32no1 >> 14); + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + RTC_DCHECK_GE(normTmp, 0); + tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) + tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 /= tmpU32no2; + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = tmpU32no2 >> nShifts; + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + if (tmpU32no1 < (16 << 14)) { + tableIndex = (int16_t)(tmpU32no1 >> 14); + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + r0 = inst->priorNonSpeechProb; + r1 = 16384 - r0; + int32_t const_23637 = 23637; + int32_t const_44 = 44; + int32_t const_84 = 84; + int32_t const_1 = 1; + int32_t const_neg8 = -8; + for (i = 0; i < inst->magnLen; i++) { + r2 = inst->logLrtTimeAvgW32[i]; + if (r2 < 65300) { + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "mul %[r2], %[r2], %[const_23637] \n\t" + "sll %[r6], %[r1], 16 \n\t" + "clz %[r7], %[r6] \n\t" + "clo %[r8], %[r6] \n\t" + "slt %[r9], %[r6], $0 \n\t" + "movn %[r7], %[r8], %[r9] \n\t" + "sra %[r2], %[r2], 14 \n\t" + "andi %[r3], %[r2], 0xfff \n\t" + "mul %[r4], %[r3], %[r3] \n\t" + "mul %[r3], %[r3], %[const_84] \n\t" + "sra %[r2], %[r2], 12 \n\t" + "slt %[r5], %[r2], %[const_neg8] \n\t" + "movn %[r2], %[const_neg8], %[r5] \n\t" + "mul %[r4], %[r4], %[const_44] \n\t" + "sra %[r3], %[r3], 7 \n\t" + "addiu %[r7], %[r7], -1 \n\t" + "slti %[r9], %[r7], 31 \n\t" + "movz %[r7], $0, %[r9] \n\t" + "sra %[r4], %[r4], 19 \n\t" + "addu %[r4], %[r4], %[r3] \n\t" + "addiu %[r3], %[r2], 8 \n\t" + "addiu %[r2], %[r2], -4 \n\t" + "neg %[r5], %[r2] \n\t" + "sllv %[r6], %[r4], %[r2] \n\t" + "srav %[r5], %[r4], %[r5] \n\t" + "slt %[r2], %[r2], $0 \n\t" + "movn %[r6], %[r5], %[r2] \n\t" + "sllv %[r3], %[const_1], %[r3] \n\t" + "addu %[r2], %[r3], %[r6] \n\t" + "clz %[r4], %[r2] \n\t" + "clo %[r5], %[r2] \n\t" + "slt %[r8], %[r2], $0 \n\t" + "movn %[r4], %[r5], %[r8] \n\t" + "addiu %[r4], %[r4], -1 \n\t" + "slt %[r5], $0, %[r2] \n\t" + "or %[r5], %[r5], %[r7] \n\t" + "movz %[r4], $0, %[r5] \n\t" + "addiu %[r6], %[r7], -7 \n\t" + "addu %[r6], %[r6], %[r4] \n\t" + "bltz %[r6], 1f \n\t" + " nop \n\t" + "addiu %[r4], %[r6], -8 \n\t" + "neg %[r3], %[r4] \n\t" + "srav %[r5], %[r2], %[r3] \n\t" + "mul %[r5], %[r5], %[r1] \n\t" + "mul %[r2], %[r2], %[r1] \n\t" + "slt %[r4], %[r4], $0 \n\t" + "srav %[r5], %[r5], %[r6] \n\t" + "sra %[r2], %[r2], 8 \n\t" + "movn %[r2], %[r5], %[r4] \n\t" + "sll %[r3], %[r0], 8 \n\t" + "addu %[r2], %[r0], %[r2] \n\t" + "divu %[r3], %[r3], %[r2] \n\t" + "1: \n\t" + ".set pop \n\t" + : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4), + [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), + [r8] "=&r" (r8), [r9] "=&r" (r9) + : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637), + [const_neg8] "r" (const_neg8), [const_84] "r" (const_84), + [const_1] "r" (const_1), [const_44] "r" (const_44) + : "hi", "lo" + ); + nonSpeechProbFinal[i] = r3; + } + } + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + int iters, after; + int anaLen = (int)inst->anaLen; + int *window = (int*)inst->window; + int *anaBuf = (int*)inst->analysisBuffer; + int *outBuf = (int*)out; + int r0, r1, r2, r3, r4, r5, r6, r7; +#if defined(MIPS_DSP_R1_LE) + int r8; +#endif + + // For lower band update analysis buffer. + memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, + inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + + // Window data before FFT. +#if defined(MIPS_DSP_R1_LE) + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 3 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lw %[r0], 0(%[window]) \n\t" + "lw %[r1], 0(%[anaBuf]) \n\t" + "lw %[r2], 4(%[window]) \n\t" + "lw %[r3], 4(%[anaBuf]) \n\t" + "lw %[r4], 8(%[window]) \n\t" + "lw %[r5], 8(%[anaBuf]) \n\t" + "lw %[r6], 12(%[window]) \n\t" + "lw %[r7], 12(%[anaBuf]) \n\t" + "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t" + "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t" + "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t" + "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t" + "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t" + "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t" + "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t" + "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t" +#if defined(MIPS_DSP_R2_LE) + "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t" + "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t" + "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t" + "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t" + "sw %[r8], 0(%[outBuf]) \n\t" + "sw %[r1], 4(%[outBuf]) \n\t" + "sw %[r3], 8(%[outBuf]) \n\t" + "sw %[r5], 12(%[outBuf]) \n\t" +#else + "shra_r.w %[r8], %[r8], 15 \n\t" + "shra_r.w %[r0], %[r0], 15 \n\t" + "shra_r.w %[r1], %[r1], 15 \n\t" + "shra_r.w %[r2], %[r2], 15 \n\t" + "shra_r.w %[r3], %[r3], 15 \n\t" + "shra_r.w %[r4], %[r4], 15 \n\t" + "shra_r.w %[r5], %[r5], 15 \n\t" + "shra_r.w %[r6], %[r6], 15 \n\t" + "sll %[r0], %[r0], 16 \n\t" + "sll %[r2], %[r2], 16 \n\t" + "sll %[r4], %[r4], 16 \n\t" + "sll %[r6], %[r6], 16 \n\t" + "packrl.ph %[r0], %[r8], %[r0] \n\t" + "packrl.ph %[r2], %[r1], %[r2] \n\t" + "packrl.ph %[r4], %[r3], %[r4] \n\t" + "packrl.ph %[r6], %[r5], %[r6] \n\t" + "sw %[r0], 0(%[outBuf]) \n\t" + "sw %[r2], 4(%[outBuf]) \n\t" + "sw %[r4], 8(%[outBuf]) \n\t" + "sw %[r6], 12(%[outBuf]) \n\t" +#endif + "addiu %[window], %[window], 16 \n\t" + "addiu %[anaBuf], %[anaBuf], 16 \n\t" + "addiu %[outBuf], %[outBuf], 16 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 7 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "shra_r.w %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8), + [iters] "=&r" (iters), [after] "=&r" (after), + [window] "+r" (window),[anaBuf] "+r" (anaBuf), + [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#else + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 2 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[anaBuf]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[anaBuf]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[anaBuf], %[anaBuf], 8 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "sh %[r0], 0(%[outBuf]) \n\t" + "sh %[r2], 2(%[outBuf]) \n\t" + "sh %[r4], 4(%[outBuf]) \n\t" + "sh %[r6], 6(%[outBuf]) \n\t" + "addiu %[outBuf], %[outBuf], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 3 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters), + [after] "=&r" (after), [window] "+r" (window), + [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#endif +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + int iters = (int)inst->blockLen10ms >> 2; + int after = inst->blockLen10ms & 3; + int r0, r1, r2, r3, r4, r5, r6, r7; + int16_t *window = (int16_t*)inst->window; + int16_t *real = inst->real; + int16_t *synthBuf = inst->synthesisBuffer; + int16_t *out = out_frame; + int sat_pos = 0x7fff; + int sat_neg = 0xffff8000; + int block10 = (int)inst->blockLen10ms; + int anaLen = (int)inst->anaLen; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r2], 2(%[out]) \n\t" + "sh %[r4], 4(%[out]) \n\t" + "sh %[r6], 6(%[out]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "addiu %[out], %[out], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "blez %[after], 3f \n\t" + " subu %[block10], %[anaLen], %[block10] \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "addiu %[out], %[out], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "3: \n\t" + "sra %[iters], %[block10], 2 \n\t" + "4: \n\t" + "blez %[iters], 5f \n\t" + " andi %[after], %[block10], 3 \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "b 4b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "5: \n\t" + "blez %[after], 6f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "6: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters), + [after] "+r" (after), [block10] "+r" (block10), + [window] "+r" (window), [real] "+r" (real), + [synthBuf] "+r" (synthBuf), [out] "+r" (out) + : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos), + [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); + + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, + int16_t* freq_buf) { + uint16_t *noiseSupFilter = inst->noiseSupFilter; + int16_t *real = inst->real; + int16_t *imag = inst->imag; + int32_t loop_count = 2; + int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6; + int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4; + int16_t* freq_buf_f = freq_buf; + int16_t* freq_buf_s = &freq_buf[tmp16]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + //first sample + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[imag], %[imag], 2 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t" + "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t" + "1: \n\t" + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "addiu %[loop_count], %[loop_count], 2 \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_5], 0(%[freq_buf_s]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + "sh %[tmp_6], 2(%[freq_buf_s]) \n\t" + "negu %[tmp_6], %[tmp_6] \n\t" + "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t" + "addiu %[real], %[real], 4 \n\t" + "addiu %[imag], %[imag], 4 \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "blt %[loop_count], %[loop_size], 1b \n\t" + " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t" + //last two samples: + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + ".set pop \n\t" + : [real] "+r" (real), [imag] "+r" (imag), + [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s), + [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter), + [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3), + [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6) + : [loop_size] "r" (inst->anaLen2) + : "memory", "hi", "lo" + ); +} + +#if defined(MIPS_DSP_R1_LE) +// Denormalize the real-valued signal |in|, the output from inverse FFT. +void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor) { + int32_t r0, r1, r2, r3, t0; + int len = (int)inst->anaLen; + int16_t *out = &inst->real[0]; + int shift = factor - inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "bltz %[shift], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "shllv_s.ph %[r1], %[r1], %[shift] \n\t" + "shllv_s.ph %[r2], %[r2], %[shift] \n\t" + "shllv_s.ph %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "b 8f \n\t" + "4: \n\t" + "negu %[shift], %[shift] \n\t" + "beqz %[t0], 6f \n\t" + " andi %[len], %[len], 3 \n\t" + "5: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "srav %[r1], %[r1], %[shift] \n\t" + "srav %[r2], %[r2], %[shift] \n\t" + "srav %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 5b \n\t" + " addiu %[out], %[out], 8 \n\t" + "6: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "7: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 7b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "8: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} +#endif + +// Normalize the real-valued signal |in|, the input to forward FFT. +void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out) { + int32_t r0, r1, r2, r3, t0; + int len = (int)inst->anaLen; + int shift = inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "sllv %[r1], %[r1], %[shift] \n\t" + "sllv %[r2], %[r2], %[shift] \n\t" + "sllv %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 4f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "4: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} + diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_neon.c b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_neon.c new file mode 100644 index 0000000000..64ce99c32c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_core_neon.c @@ -0,0 +1,606 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/ns/nsx_core.h" + +#include <arm_neon.h> + +#include "rtc_base/checks.h" + +// Constants to compensate for shifting signal log(2^shifts). +const int16_t WebRtcNsx_kLogTable[9] = { + 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 +}; + +const int16_t WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, + 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 +}; + +const int16_t WebRtcNsx_kLogTableFrac[256] = { + 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, + 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, + 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, + 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 +}; + +// Update the noise estimation information. +static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) { + const int16_t kExp2Const = 11819; // Q13 + int16_t* ptr_noiseEstLogQuantile = NULL; + int16_t* ptr_noiseEstQuantile = NULL; + int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const); + int32x4_t twentyOne32x4 = vdupq_n_s32(21); + int32x4_t constA32x4 = vdupq_n_s32(0x1fffff); + int32x4_t constB32x4 = vdupq_n_s32(0x200000); + + int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + inst->magnLen); + + // Guarantee a Q-domain as high as possible and still fit in int16 + inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const, + tmp16, + 21); + + int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise); + + for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset], + ptr_noiseEstQuantile = &inst->noiseEstQuantile[0]; + ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3]; + ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) { + + // tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i]; + int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile); + int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4); + + // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4); + v32x4A = vorrq_s32(v32x4A, constB32x4); + + // tmp16 = (int16_t)(tmp32no2 >> 21); + v32x4B = vshrq_n_s32(v32x4B, 21); + + // tmp16 -= 21;// shift 21 to get result in Q0 + v32x4B = vsubq_s32(v32x4B, twentyOne32x4); + + // tmp16 += (int16_t) inst->qNoise; + // shift to get result in Q(qNoise) + v32x4B = vaddq_s32(v32x4B, qNoise32x4); + + // if (tmp16 < 0) { + // tmp32no1 >>= -tmp16; + // } else { + // tmp32no1 <<= tmp16; + // } + v32x4B = vshlq_s32(v32x4A, v32x4B); + + // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1); + v16x4 = vqmovn_s32(v32x4B); + + //inst->noiseEstQuantile[i] = tmp16; + vst1_s16(ptr_noiseEstQuantile, v16x4); + } + + // Last iteration: + + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile; + int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + + tmp16 = (int16_t)(tmp32no2 >> 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 >>= -tmp16; + } else { + tmp32no1 <<= tmp16; + } + *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1); +} + +// Noise Estimation +void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + int16_t countProd, delta, zeros, frac; + int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; + const int16_t width_factor = 21845; + + size_t i, s, offset; + + tabind = inst->stages - inst->normData; + RTC_DCHECK_LT(tabind, 9); + RTC_DCHECK_GT(tabind, -9); + if (tabind < 0) { + logval = -WebRtcNsx_kLogTable[-tabind]; + } else { + logval = WebRtcNsx_kLogTable[tabind]; + } + + int16x8_t logval_16x8 = vdupq_n_s16(logval); + + // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) + // magn is in Q(-stages), and the real lmagn values are: + // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) + // lmagn in Q8 + for (i = 0; i < inst->magnLen; i++) { + if (magn[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)((((uint32_t)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); + RTC_DCHECK_LT(frac, 256); + // log2(magn(i)) + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + // log2(magn(i))*log(2) + lmagn[i] = (int16_t)((log2 * log2_const) >> 15); + // + log(2^stages) + lmagn[i] += logval; + } else { + lmagn[i] = logval; + } + } + + int16x4_t Q3_16x4 = vdup_n_s16(3); + int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); + int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); + + int16_t factor = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) + factor = FACTOR_Q7_STARTUP; + + // Loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) { + offset = s * inst->magnLen; + + // Get counter values from state + counter = inst->noiseEstCounter[s]; + RTC_DCHECK_LT(counter, 201); + countDiv = WebRtcNsx_kCounterDiv[counter]; + countProd = (int16_t)(counter * countDiv); + + // quant_est(...) + int16_t deltaBuff[8]; + int16x4_t tmp16x4_0; + int16x4_t tmp16x4_1; + int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); + int16x8_t countProd_16x8 = vdupq_n_s16(countProd); + int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv); + int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0); + int16x8_t tmp16x8_1; + int16x8_t tmp16x8_2; + int16x8_t tmp16x8_3; + uint16x8_t tmp16x8_4; + int32x4_t tmp32x4; + + for (i = 0; i + 7 < inst->magnLen; i += 8) { + // Compute delta. + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + tmp16x8_0 = vdupq_n_s16(factor); + vst1q_s16(deltaBuff, tmp16x8_0); + + int j; + for (j = 0; j < 8; j++) { + if (inst->noiseEstDensity[offset + i + j] > 512) { + // Get values for deltaBuff by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]); + deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } + } + + // Update log quantile estimate + + // tmp16 = (int16_t)((delta * countDiv) >> 14); + tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4); + tmp16x4_1 = vshrn_n_s32(tmp32x4, 14); + tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4); + tmp16x4_0 = vshrn_n_s32(tmp32x4, 14); + tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines. + + // prepare for the "if" branch + // tmp16 += 2; + // tmp16_1 = (Word16)(tmp16>>2); + tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2); + + // inst->noiseEstLogQuantile[offset+i] + tmp16_1; + tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep + tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines + + // Prepare for the "else" branch + // tmp16 += 1; + // tmp16_1 = (Word16)(tmp16>>1); + tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1); + + // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1); + tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4); + tmp16x4_1 = vshrn_n_s32(tmp32x4, 1); + + // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1); + tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4); + tmp16x4_0 = vshrn_n_s32(tmp32x4, 1); + + // inst->noiseEstLogQuantile[offset + i] - tmp16_2; + tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep + tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0); + + // logval is the smallest fixed point representation we can have. Values + // below that will correspond to values in the interval [0, 1], which + // can't possibly occur. + tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8); + + // Do the if-else branches: + tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines + tmp16x8_4 = vcgtq_s16(tmp16x8_3, tmp16x8_2); + tmp16x8_2 = vbslq_s16(tmp16x8_4, tmp16x8_1, tmp16x8_0); + vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2); + + // Update density estimate + // tmp16_1 + tmp16_2 + tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]); + tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8); + tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8); + + // lmagn[i] - inst->noiseEstLogQuantile[offset + i] + tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2); + tmp16x8_3 = vabsq_s16(tmp16x8_3); + tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3); + tmp16x8_1 = vbslq_s16(tmp16x8_4, tmp16x8_0, tmp16x8_1); + vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1); + } // End loop over magnitude spectrum + + // Last iteration over magnitude spectrum: + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + // Get values for deltaBuff by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]); + delta = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; + } + } + // update log quantile estimate + tmp16 = (int16_t)((delta * countDiv) >> 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + inst->noiseEstLogQuantile[offset + i] += tmp16 / 4; + } else { + tmp16 += 1; + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + // TODO(bjornv): investigate why we need to truncate twice. + tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // logval is the smallest fixed point representation we can have. + // Values below that will correspond to values in the interval + // [0, 1], which can't possibly occur. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } + + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } + + + if (counter >= END_STARTUP_LONG) { + inst->noiseEstCounter[s] = 0; + if (inst->blockIndex >= END_STARTUP_LONG) { + UpdateNoiseEstimateNeon(inst, offset); + } + } + inst->noiseEstCounter[s]++; + + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->blockIndex < END_STARTUP_LONG) { + UpdateNoiseEstimateNeon(inst, offset); + } + + for (i = 0; i < inst->magnLen; i++) { + noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) + } + (*q_noise) = (int16_t)inst->qNoise; +} + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, + int16_t* freq_buf) { + RTC_DCHECK_EQ(1, inst->magnLen % 8); + RTC_DCHECK_EQ(0, inst->anaLen2 % 16); + + // (1) Filtering. + + // Fixed point C code for the next block is as follows: + // for (i = 0; i < inst->magnLen; i++) { + // inst->real[i] = (int16_t)((inst->real[i] * + // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + // inst->imag[i] = (int16_t)((inst->imag[i] * + // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + // } + + int16_t* preal = &inst->real[0]; + int16_t* pimag = &inst->imag[0]; + int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0]; + int16_t* pimag_end = pimag + inst->magnLen - 4; + + while (pimag < pimag_end) { + int16x8_t real = vld1q_s16(preal); + int16x8_t imag = vld1q_s16(pimag); + int16x8_t ns_filter = vld1q_s16(pns_filter); + + int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter)); + int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter)); + int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real), + vget_high_s16(ns_filter)); + int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag), + vget_high_s16(ns_filter)); + + int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14); + int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14); + int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14); + int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14); + + vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1)); + vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1)); + preal += 8; + pimag += 8; + pns_filter += 8; + } + + // Filter the last element + *preal = (int16_t)((*preal * *pns_filter) >> 14); + *pimag = (int16_t)((*pimag * *pns_filter) >> 14); + + // (2) Create spectrum. + + // Fixed point C code for the rest of the function is as follows: + // freq_buf[0] = inst->real[0]; + // freq_buf[1] = -inst->imag[0]; + // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + // freq_buf[j] = inst->real[i]; + // freq_buf[j + 1] = -inst->imag[i]; + // } + // freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; + + preal = &inst->real[0]; + pimag = &inst->imag[0]; + pimag_end = pimag + inst->anaLen2; + int16_t * freq_buf_start = freq_buf; + while (pimag < pimag_end) { + // loop unroll + int16x8x2_t real_imag_0; + int16x8x2_t real_imag_1; + real_imag_0.val[1] = vld1q_s16(pimag); + real_imag_0.val[0] = vld1q_s16(preal); + preal += 8; + pimag += 8; + real_imag_1.val[1] = vld1q_s16(pimag); + real_imag_1.val[0] = vld1q_s16(preal); + preal += 8; + pimag += 8; + + real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]); + real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]); + vst2q_s16(freq_buf_start, real_imag_0); + freq_buf_start += 16; + vst2q_s16(freq_buf_start, real_imag_1); + freq_buf_start += 16; + } + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// For the noise supress process, synthesis, read out fully processed segment, +// and update synthesis buffer. +void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + RTC_DCHECK_EQ(0, inst->anaLen % 16); + RTC_DCHECK_EQ(0, inst->blockLen10ms % 16); + + int16_t* preal_start = inst->real; + const int16_t* pwindow = inst->window; + int16_t* preal_end = preal_start + inst->anaLen; + int16_t* psynthesis_buffer = inst->synthesisBuffer; + + while (preal_start < preal_end) { + // Loop unroll. + int16x8_t window_0 = vld1q_s16(pwindow); + int16x8_t real_0 = vld1q_s16(preal_start); + int16x8_t synthesis_buffer_0 = vld1q_s16(psynthesis_buffer); + + int16x8_t window_1 = vld1q_s16(pwindow + 8); + int16x8_t real_1 = vld1q_s16(preal_start + 8); + int16x8_t synthesis_buffer_1 = vld1q_s16(psynthesis_buffer + 8); + + int32x4_t tmp32a_0_low = vmull_s16(vget_low_s16(real_0), + vget_low_s16(window_0)); + int32x4_t tmp32a_0_high = vmull_s16(vget_high_s16(real_0), + vget_high_s16(window_0)); + + int32x4_t tmp32a_1_low = vmull_s16(vget_low_s16(real_1), + vget_low_s16(window_1)); + int32x4_t tmp32a_1_high = vmull_s16(vget_high_s16(real_1), + vget_high_s16(window_1)); + + int16x4_t tmp16a_0_low = vqrshrn_n_s32(tmp32a_0_low, 14); + int16x4_t tmp16a_0_high = vqrshrn_n_s32(tmp32a_0_high, 14); + + int16x4_t tmp16a_1_low = vqrshrn_n_s32(tmp32a_1_low, 14); + int16x4_t tmp16a_1_high = vqrshrn_n_s32(tmp32a_1_high, 14); + + int32x4_t tmp32b_0_low = vmull_n_s16(tmp16a_0_low, gain_factor); + int32x4_t tmp32b_0_high = vmull_n_s16(tmp16a_0_high, gain_factor); + + int32x4_t tmp32b_1_low = vmull_n_s16(tmp16a_1_low, gain_factor); + int32x4_t tmp32b_1_high = vmull_n_s16(tmp16a_1_high, gain_factor); + + int16x4_t tmp16b_0_low = vqrshrn_n_s32(tmp32b_0_low, 13); + int16x4_t tmp16b_0_high = vqrshrn_n_s32(tmp32b_0_high, 13); + + int16x4_t tmp16b_1_low = vqrshrn_n_s32(tmp32b_1_low, 13); + int16x4_t tmp16b_1_high = vqrshrn_n_s32(tmp32b_1_high, 13); + + synthesis_buffer_0 = vqaddq_s16(vcombine_s16(tmp16b_0_low, tmp16b_0_high), + synthesis_buffer_0); + synthesis_buffer_1 = vqaddq_s16(vcombine_s16(tmp16b_1_low, tmp16b_1_high), + synthesis_buffer_1); + vst1q_s16(psynthesis_buffer, synthesis_buffer_0); + vst1q_s16(psynthesis_buffer + 8, synthesis_buffer_1); + + pwindow += 16; + preal_start += 16; + psynthesis_buffer += 16; + } + + // Read out fully processed segment. + int16_t * p_start = inst->synthesisBuffer; + int16_t * p_end = inst->synthesisBuffer + inst->blockLen10ms; + int16_t * p_frame = out_frame; + while (p_start < p_end) { + int16x8_t frame_0 = vld1q_s16(p_start); + vst1q_s16(p_frame, frame_0); + p_start += 8; + p_frame += 8; + } + + // Update synthesis buffer. + int16_t* p_start_src = inst->synthesisBuffer + inst->blockLen10ms; + int16_t* p_end_src = inst->synthesisBuffer + inst->anaLen; + int16_t* p_start_dst = inst->synthesisBuffer; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + p_start_src += 8; + p_start_dst += 8; + } + + p_start = inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms; + p_end = p_start + inst->blockLen10ms; + int16x8_t zero = vdupq_n_s16(0); + for (;p_start < p_end; p_start += 8) { + vst1q_s16(p_start, zero); + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + RTC_DCHECK_EQ(0, inst->blockLen10ms % 16); + RTC_DCHECK_EQ(0, inst->anaLen % 16); + + // For lower band update analysis buffer. + // memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + // (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms; + int16_t* p_end_src = inst->analysisBuffer + inst->anaLen; + int16_t* p_start_dst = inst->analysisBuffer; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + + p_start_src += 8; + p_start_dst += 8; + } + + // memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, + // new_speech, inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + p_start_src = new_speech; + p_end_src = new_speech + inst->blockLen10ms; + p_start_dst = inst->analysisBuffer + inst->anaLen - inst->blockLen10ms; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + + p_start_src += 8; + p_start_dst += 8; + } + + // Window data before FFT. + int16_t* p_start_window = (int16_t*) inst->window; + int16_t* p_start_buffer = inst->analysisBuffer; + int16_t* p_end_buffer = inst->analysisBuffer + inst->anaLen; + int16_t* p_start_out = out; + + // Load the first element to reduce pipeline bubble. + int16x8_t window = vld1q_s16(p_start_window); + int16x8_t buffer = vld1q_s16(p_start_buffer); + p_start_window += 8; + p_start_buffer += 8; + + while (p_start_buffer < p_end_buffer) { + // Unroll loop. + int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer)); + int32x4_t tmp32_high = vmull_s16(vget_high_s16(window), + vget_high_s16(buffer)); + window = vld1q_s16(p_start_window); + buffer = vld1q_s16(p_start_buffer); + + int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14); + int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14); + vst1q_s16(p_start_out, vcombine_s16(result_low, result_high)); + + p_start_buffer += 8; + p_start_window += 8; + p_start_out += 8; + } + int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer)); + int32x4_t tmp32_high = vmull_s16(vget_high_s16(window), + vget_high_s16(buffer)); + + int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14); + int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14); + vst1q_s16(p_start_out, vcombine_s16(result_low, result_high)); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_defines.h b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_defines.h new file mode 100644 index 0000000000..12869b3cf5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/nsx_defines.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ +#define MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ + +#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */ +#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */ +#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */ +#define SIMULT 3 +#define END_STARTUP_LONG 200 +#define END_STARTUP_SHORT 50 +#define FACTOR_Q16 2621440 /* 40 in Q16 */ +#define FACTOR_Q7 5120 /* 40 in Q7 */ +#define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */ +#define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */ + +/* PARAMETERS FOR NEW METHOD */ +#define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */ +#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */ +#define SPECT_FLAT_TAVG_Q14 4915 /* (0.30) tavg parameter for spectral flatness measure */ +#define SPECT_DIFF_TAVG_Q8 77 /* (0.30) tavg parameter for spectral flatness measure */ +#define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */ +#define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */ + +/* Probability threshold for noise state in speech/noise likelihood. */ +#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */ +#define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */ + +/* FEATURE EXTRACTION CONFIG */ +/* Bin size of histogram */ +#define BIN_SIZE_LRT 10 +/* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */ +/* Thresholds for prior model */ +#define FACTOR_1_LRT_DIFF 6 /* For LRT and spectral difference (5 times bigger) */ +/* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */ +#define FACTOR_2_FLAT_Q10 922 +/* Peak limit for spectral flatness (varies between 0 and 1) */ +#define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */ +/* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */ +#define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */ +/* Limit on relevance of second peak */ +#define LIM_PEAK_WEIGHT_FLAT_DIFF 2 +#define THRES_FLUCT_LRT 10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */ +/* Limit on the max and min values for the feature thresholds */ +#define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */ +#define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */ +#define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */ +#define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */ +/* Criteria of weight of histogram peak to accept/reject feature */ +#define THRES_WEIGHT_FLAT_DIFF 154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */ + +#define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */ +#define ONE_MINUS_GAMMA_PAUSE_Q8 13 /* ~= Q8(0.05) Update for conservative noise estimate */ +#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */ + +#endif /* MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/ns/windows_private.h b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/windows_private.h new file mode 100644 index 0000000000..2ffd693d8d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/ns/windows_private.h @@ -0,0 +1,574 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ +#define MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ + +// Hanning window for 4ms 16kHz +static const float kHanning64w128[128] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, + 0.07356456359967f, 0.09801714032956f, 0.12241067519922f, + 0.14673047445536f, 0.17096188876030f, 0.19509032201613f, + 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, + 0.35989503653499f, 0.38268343236509f, 0.40524131400499f, + 0.42755509343028f, 0.44961132965461f, 0.47139673682600f, + 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, + 0.61523159058063f, 0.63439328416365f, 0.65317284295378f, + 0.67155895484702f, 0.68954054473707f, 0.70710678118655f, + 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, + 0.81758481315158f, 0.83146961230255f, 0.84485356524971f, + 0.85772861000027f, 0.87008699110871f, 0.88192126434835f, + 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, + 0.94952818059304f, 0.95694033573221f, 0.96377606579544f, + 0.97003125319454f, 0.97570213003853f, 0.98078528040323f, + 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, + 0.99969881869620f, 1.00000000000000f, + 0.99969881869620f, 0.99879545620517f, 0.99729045667869f, + 0.99518472667220f, 0.99247953459871f, 0.98917650996478f, + 0.98527764238894f, 0.98078528040323f, 0.97570213003853f, + 0.97003125319454f, 0.96377606579544f, 0.95694033573221f, + 0.94952818059304f, 0.94154406518302f, 0.93299279883474f, + 0.92387953251129f, 0.91420975570353f, 0.90398929312344f, + 0.89322430119552f, 0.88192126434835f, 0.87008699110871f, + 0.85772861000027f, 0.84485356524971f, 0.83146961230255f, + 0.81758481315158f, 0.80320753148064f, 0.78834642762661f, + 0.77301045336274f, 0.75720884650648f, 0.74095112535496f, + 0.72424708295147f, 0.70710678118655f, 0.68954054473707f, + 0.67155895484702f, 0.65317284295378f, 0.63439328416365f, + 0.61523159058063f, 0.59569930449243f, 0.57580819141785f, + 0.55557023301960f, 0.53499761988710f, 0.51410274419322f, + 0.49289819222978f, 0.47139673682600f, 0.44961132965461f, + 0.42755509343028f, 0.40524131400499f, 0.38268343236509f, + 0.35989503653499f, 0.33688985339222f, 0.31368174039889f, + 0.29028467725446f, 0.26671275747490f, 0.24298017990326f, + 0.21910124015687f, 0.19509032201613f, 0.17096188876030f, + 0.14673047445536f, 0.12241067519922f, 0.09801714032956f, + 0.07356456359967f, 0.04906767432742f, 0.02454122852291f +}; + + + +// hybrib Hanning & flat window +static const float kBlocks80w128[128] = { + (float)0.00000000, (float)0.03271908, (float)0.06540313, (float)0.09801714, (float)0.13052619, + (float)0.16289547, (float)0.19509032, (float)0.22707626, (float)0.25881905, (float)0.29028468, + (float)0.32143947, (float)0.35225005, (float)0.38268343, (float)0.41270703, (float)0.44228869, + (float)0.47139674, (float)0.50000000, (float)0.52806785, (float)0.55557023, (float)0.58247770, + (float)0.60876143, (float)0.63439328, (float)0.65934582, (float)0.68359230, (float)0.70710678, + (float)0.72986407, (float)0.75183981, (float)0.77301045, (float)0.79335334, (float)0.81284668, + (float)0.83146961, (float)0.84920218, (float)0.86602540, (float)0.88192126, (float)0.89687274, + (float)0.91086382, (float)0.92387953, (float)0.93590593, (float)0.94693013, (float)0.95694034, + (float)0.96592583, (float)0.97387698, (float)0.98078528, (float)0.98664333, (float)0.99144486, + (float)0.99518473, (float)0.99785892, (float)0.99946459, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)0.99946459, (float)0.99785892, (float)0.99518473, (float)0.99144486, + (float)0.98664333, (float)0.98078528, (float)0.97387698, (float)0.96592583, (float)0.95694034, + (float)0.94693013, (float)0.93590593, (float)0.92387953, (float)0.91086382, (float)0.89687274, + (float)0.88192126, (float)0.86602540, (float)0.84920218, (float)0.83146961, (float)0.81284668, + (float)0.79335334, (float)0.77301045, (float)0.75183981, (float)0.72986407, (float)0.70710678, + (float)0.68359230, (float)0.65934582, (float)0.63439328, (float)0.60876143, (float)0.58247770, + (float)0.55557023, (float)0.52806785, (float)0.50000000, (float)0.47139674, (float)0.44228869, + (float)0.41270703, (float)0.38268343, (float)0.35225005, (float)0.32143947, (float)0.29028468, + (float)0.25881905, (float)0.22707626, (float)0.19509032, (float)0.16289547, (float)0.13052619, + (float)0.09801714, (float)0.06540313, (float)0.03271908 +}; + +// hybrib Hanning & flat window +static const float kBlocks160w256[256] = { + (float)0.00000000, (float)0.01636173, (float)0.03271908, (float)0.04906767, (float)0.06540313, + (float)0.08172107, (float)0.09801714, (float)0.11428696, (float)0.13052619, (float)0.14673047, + (float)0.16289547, (float)0.17901686, (float)0.19509032, (float)0.21111155, (float)0.22707626, + (float)0.24298018, (float)0.25881905, (float)0.27458862, (float)0.29028468, (float)0.30590302, + (float)0.32143947, (float)0.33688985, (float)0.35225005, (float)0.36751594, (float)0.38268343, + (float)0.39774847, (float)0.41270703, (float)0.42755509, (float)0.44228869, (float)0.45690388, + (float)0.47139674, (float)0.48576339, (float)0.50000000, (float)0.51410274, (float)0.52806785, + (float)0.54189158, (float)0.55557023, (float)0.56910015, (float)0.58247770, (float)0.59569930, + (float)0.60876143, (float)0.62166057, (float)0.63439328, (float)0.64695615, (float)0.65934582, + (float)0.67155895, (float)0.68359230, (float)0.69544264, (float)0.70710678, (float)0.71858162, + (float)0.72986407, (float)0.74095113, (float)0.75183981, (float)0.76252720, (float)0.77301045, + (float)0.78328675, (float)0.79335334, (float)0.80320753, (float)0.81284668, (float)0.82226822, + (float)0.83146961, (float)0.84044840, (float)0.84920218, (float)0.85772861, (float)0.86602540, + (float)0.87409034, (float)0.88192126, (float)0.88951608, (float)0.89687274, (float)0.90398929, + (float)0.91086382, (float)0.91749450, (float)0.92387953, (float)0.93001722, (float)0.93590593, + (float)0.94154407, (float)0.94693013, (float)0.95206268, (float)0.95694034, (float)0.96156180, + (float)0.96592583, (float)0.97003125, (float)0.97387698, (float)0.97746197, (float)0.98078528, + (float)0.98384601, (float)0.98664333, (float)0.98917651, (float)0.99144486, (float)0.99344778, + (float)0.99518473, (float)0.99665524, (float)0.99785892, (float)0.99879546, (float)0.99946459, + (float)0.99986614, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)0.99986614, (float)0.99946459, (float)0.99879546, (float)0.99785892, + (float)0.99665524, (float)0.99518473, (float)0.99344778, (float)0.99144486, (float)0.98917651, + (float)0.98664333, (float)0.98384601, (float)0.98078528, (float)0.97746197, (float)0.97387698, + (float)0.97003125, (float)0.96592583, (float)0.96156180, (float)0.95694034, (float)0.95206268, + (float)0.94693013, (float)0.94154407, (float)0.93590593, (float)0.93001722, (float)0.92387953, + (float)0.91749450, (float)0.91086382, (float)0.90398929, (float)0.89687274, (float)0.88951608, + (float)0.88192126, (float)0.87409034, (float)0.86602540, (float)0.85772861, (float)0.84920218, + (float)0.84044840, (float)0.83146961, (float)0.82226822, (float)0.81284668, (float)0.80320753, + (float)0.79335334, (float)0.78328675, (float)0.77301045, (float)0.76252720, (float)0.75183981, + (float)0.74095113, (float)0.72986407, (float)0.71858162, (float)0.70710678, (float)0.69544264, + (float)0.68359230, (float)0.67155895, (float)0.65934582, (float)0.64695615, (float)0.63439328, + (float)0.62166057, (float)0.60876143, (float)0.59569930, (float)0.58247770, (float)0.56910015, + (float)0.55557023, (float)0.54189158, (float)0.52806785, (float)0.51410274, (float)0.50000000, + (float)0.48576339, (float)0.47139674, (float)0.45690388, (float)0.44228869, (float)0.42755509, + (float)0.41270703, (float)0.39774847, (float)0.38268343, (float)0.36751594, (float)0.35225005, + (float)0.33688985, (float)0.32143947, (float)0.30590302, (float)0.29028468, (float)0.27458862, + (float)0.25881905, (float)0.24298018, (float)0.22707626, (float)0.21111155, (float)0.19509032, + (float)0.17901686, (float)0.16289547, (float)0.14673047, (float)0.13052619, (float)0.11428696, + (float)0.09801714, (float)0.08172107, (float)0.06540313, (float)0.04906767, (float)0.03271908, + (float)0.01636173 +}; + +// hybrib Hanning & flat window: for 20ms +static const float kBlocks320w512[512] = { + (float)0.00000000, (float)0.00818114, (float)0.01636173, (float)0.02454123, (float)0.03271908, + (float)0.04089475, (float)0.04906767, (float)0.05723732, (float)0.06540313, (float)0.07356456, + (float)0.08172107, (float)0.08987211, (float)0.09801714, (float)0.10615561, (float)0.11428696, + (float)0.12241068, (float)0.13052619, (float)0.13863297, (float)0.14673047, (float)0.15481816, + (float)0.16289547, (float)0.17096189, (float)0.17901686, (float)0.18705985, (float)0.19509032, + (float)0.20310773, (float)0.21111155, (float)0.21910124, (float)0.22707626, (float)0.23503609, + (float)0.24298018, (float)0.25090801, (float)0.25881905, (float)0.26671276, (float)0.27458862, + (float)0.28244610, (float)0.29028468, (float)0.29810383, (float)0.30590302, (float)0.31368174, + (float)0.32143947, (float)0.32917568, (float)0.33688985, (float)0.34458148, (float)0.35225005, + (float)0.35989504, (float)0.36751594, (float)0.37511224, (float)0.38268343, (float)0.39022901, + (float)0.39774847, (float)0.40524131, (float)0.41270703, (float)0.42014512, (float)0.42755509, + (float)0.43493645, (float)0.44228869, (float)0.44961133, (float)0.45690388, (float)0.46416584, + (float)0.47139674, (float)0.47859608, (float)0.48576339, (float)0.49289819, (float)0.50000000, + (float)0.50706834, (float)0.51410274, (float)0.52110274, (float)0.52806785, (float)0.53499762, + (float)0.54189158, (float)0.54874927, (float)0.55557023, (float)0.56235401, (float)0.56910015, + (float)0.57580819, (float)0.58247770, (float)0.58910822, (float)0.59569930, (float)0.60225052, + (float)0.60876143, (float)0.61523159, (float)0.62166057, (float)0.62804795, (float)0.63439328, + (float)0.64069616, (float)0.64695615, (float)0.65317284, (float)0.65934582, (float)0.66547466, + (float)0.67155895, (float)0.67759830, (float)0.68359230, (float)0.68954054, (float)0.69544264, + (float)0.70129818, (float)0.70710678, (float)0.71286806, (float)0.71858162, (float)0.72424708, + (float)0.72986407, (float)0.73543221, (float)0.74095113, (float)0.74642045, (float)0.75183981, + (float)0.75720885, (float)0.76252720, (float)0.76779452, (float)0.77301045, (float)0.77817464, + (float)0.78328675, (float)0.78834643, (float)0.79335334, (float)0.79830715, (float)0.80320753, + (float)0.80805415, (float)0.81284668, (float)0.81758481, (float)0.82226822, (float)0.82689659, + (float)0.83146961, (float)0.83598698, (float)0.84044840, (float)0.84485357, (float)0.84920218, + (float)0.85349396, (float)0.85772861, (float)0.86190585, (float)0.86602540, (float)0.87008699, + (float)0.87409034, (float)0.87803519, (float)0.88192126, (float)0.88574831, (float)0.88951608, + (float)0.89322430, (float)0.89687274, (float)0.90046115, (float)0.90398929, (float)0.90745693, + (float)0.91086382, (float)0.91420976, (float)0.91749450, (float)0.92071783, (float)0.92387953, + (float)0.92697940, (float)0.93001722, (float)0.93299280, (float)0.93590593, (float)0.93875641, + (float)0.94154407, (float)0.94426870, (float)0.94693013, (float)0.94952818, (float)0.95206268, + (float)0.95453345, (float)0.95694034, (float)0.95928317, (float)0.96156180, (float)0.96377607, + (float)0.96592583, (float)0.96801094, (float)0.97003125, (float)0.97198664, (float)0.97387698, + (float)0.97570213, (float)0.97746197, (float)0.97915640, (float)0.98078528, (float)0.98234852, + (float)0.98384601, (float)0.98527764, (float)0.98664333, (float)0.98794298, (float)0.98917651, + (float)0.99034383, (float)0.99144486, (float)0.99247953, (float)0.99344778, (float)0.99434953, + (float)0.99518473, (float)0.99595331, (float)0.99665524, (float)0.99729046, (float)0.99785892, + (float)0.99836060, (float)0.99879546, (float)0.99916346, (float)0.99946459, (float)0.99969882, + (float)0.99986614, (float)0.99996653, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, + (float)1.00000000, (float)0.99996653, (float)0.99986614, (float)0.99969882, (float)0.99946459, + (float)0.99916346, (float)0.99879546, (float)0.99836060, (float)0.99785892, (float)0.99729046, + (float)0.99665524, (float)0.99595331, (float)0.99518473, (float)0.99434953, (float)0.99344778, + (float)0.99247953, (float)0.99144486, (float)0.99034383, (float)0.98917651, (float)0.98794298, + (float)0.98664333, (float)0.98527764, (float)0.98384601, (float)0.98234852, (float)0.98078528, + (float)0.97915640, (float)0.97746197, (float)0.97570213, (float)0.97387698, (float)0.97198664, + (float)0.97003125, (float)0.96801094, (float)0.96592583, (float)0.96377607, (float)0.96156180, + (float)0.95928317, (float)0.95694034, (float)0.95453345, (float)0.95206268, (float)0.94952818, + (float)0.94693013, (float)0.94426870, (float)0.94154407, (float)0.93875641, (float)0.93590593, + (float)0.93299280, (float)0.93001722, (float)0.92697940, (float)0.92387953, (float)0.92071783, + (float)0.91749450, (float)0.91420976, (float)0.91086382, (float)0.90745693, (float)0.90398929, + (float)0.90046115, (float)0.89687274, (float)0.89322430, (float)0.88951608, (float)0.88574831, + (float)0.88192126, (float)0.87803519, (float)0.87409034, (float)0.87008699, (float)0.86602540, + (float)0.86190585, (float)0.85772861, (float)0.85349396, (float)0.84920218, (float)0.84485357, + (float)0.84044840, (float)0.83598698, (float)0.83146961, (float)0.82689659, (float)0.82226822, + (float)0.81758481, (float)0.81284668, (float)0.80805415, (float)0.80320753, (float)0.79830715, + (float)0.79335334, (float)0.78834643, (float)0.78328675, (float)0.77817464, (float)0.77301045, + (float)0.76779452, (float)0.76252720, (float)0.75720885, (float)0.75183981, (float)0.74642045, + (float)0.74095113, (float)0.73543221, (float)0.72986407, (float)0.72424708, (float)0.71858162, + (float)0.71286806, (float)0.70710678, (float)0.70129818, (float)0.69544264, (float)0.68954054, + (float)0.68359230, (float)0.67759830, (float)0.67155895, (float)0.66547466, (float)0.65934582, + (float)0.65317284, (float)0.64695615, (float)0.64069616, (float)0.63439328, (float)0.62804795, + (float)0.62166057, (float)0.61523159, (float)0.60876143, (float)0.60225052, (float)0.59569930, + (float)0.58910822, (float)0.58247770, (float)0.57580819, (float)0.56910015, (float)0.56235401, + (float)0.55557023, (float)0.54874927, (float)0.54189158, (float)0.53499762, (float)0.52806785, + (float)0.52110274, (float)0.51410274, (float)0.50706834, (float)0.50000000, (float)0.49289819, + (float)0.48576339, (float)0.47859608, (float)0.47139674, (float)0.46416584, (float)0.45690388, + (float)0.44961133, (float)0.44228869, (float)0.43493645, (float)0.42755509, (float)0.42014512, + (float)0.41270703, (float)0.40524131, (float)0.39774847, (float)0.39022901, (float)0.38268343, + (float)0.37511224, (float)0.36751594, (float)0.35989504, (float)0.35225005, (float)0.34458148, + (float)0.33688985, (float)0.32917568, (float)0.32143947, (float)0.31368174, (float)0.30590302, + (float)0.29810383, (float)0.29028468, (float)0.28244610, (float)0.27458862, (float)0.26671276, + (float)0.25881905, (float)0.25090801, (float)0.24298018, (float)0.23503609, (float)0.22707626, + (float)0.21910124, (float)0.21111155, (float)0.20310773, (float)0.19509032, (float)0.18705985, + (float)0.17901686, (float)0.17096189, (float)0.16289547, (float)0.15481816, (float)0.14673047, + (float)0.13863297, (float)0.13052619, (float)0.12241068, (float)0.11428696, (float)0.10615561, + (float)0.09801714, (float)0.08987211, (float)0.08172107, (float)0.07356456, (float)0.06540313, + (float)0.05723732, (float)0.04906767, (float)0.04089475, (float)0.03271908, (float)0.02454123, + (float)0.01636173, (float)0.00818114 +}; + + +// Hanning window: for 15ms at 16kHz with symmetric zeros +static const float kBlocks240w512[512] = { + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00654494, (float)0.01308960, (float)0.01963369, + (float)0.02617695, (float)0.03271908, (float)0.03925982, (float)0.04579887, (float)0.05233596, + (float)0.05887080, (float)0.06540313, (float)0.07193266, (float)0.07845910, (float)0.08498218, + (float)0.09150162, (float)0.09801714, (float)0.10452846, (float)0.11103531, (float)0.11753740, + (float)0.12403446, (float)0.13052620, (float)0.13701233, (float)0.14349262, (float)0.14996676, + (float)0.15643448, (float)0.16289547, (float)0.16934951, (float)0.17579629, (float)0.18223552, + (float)0.18866697, (float)0.19509032, (float)0.20150533, (float)0.20791170, (float)0.21430916, + (float)0.22069745, (float)0.22707628, (float)0.23344538, (float)0.23980446, (float)0.24615330, + (float)0.25249159, (float)0.25881904, (float)0.26513544, (float)0.27144045, (float)0.27773386, + (float)0.28401536, (float)0.29028466, (float)0.29654160, (float)0.30278578, (float)0.30901700, + (float)0.31523499, (float)0.32143945, (float)0.32763019, (float)0.33380687, (float)0.33996925, + (float)0.34611708, (float)0.35225007, (float)0.35836795, (float)0.36447051, (float)0.37055743, + (float)0.37662852, (float)0.38268346, (float)0.38872197, (float)0.39474389, (float)0.40074885, + (float)0.40673664, (float)0.41270703, (float)0.41865975, (float)0.42459452, (float)0.43051112, + (float)0.43640924, (float)0.44228873, (float)0.44814920, (float)0.45399052, (float)0.45981237, + (float)0.46561453, (float)0.47139674, (float)0.47715878, (float)0.48290035, (float)0.48862126, + (float)0.49432120, (float)0.50000000, (float)0.50565743, (float)0.51129311, (float)0.51690692, + (float)0.52249855, (float)0.52806789, (float)0.53361452, (float)0.53913832, (float)0.54463905, + (float)0.55011642, (float)0.55557024, (float)0.56100029, (float)0.56640625, (float)0.57178795, + (float)0.57714522, (float)0.58247769, (float)0.58778524, (float)0.59306765, (float)0.59832460, + (float)0.60355598, (float)0.60876143, (float)0.61394083, (float)0.61909395, (float)0.62422055, + (float)0.62932038, (float)0.63439333, (float)0.63943899, (float)0.64445734, (float)0.64944810, + (float)0.65441096, (float)0.65934587, (float)0.66425246, (float)0.66913062, (float)0.67398012, + (float)0.67880076, (float)0.68359232, (float)0.68835455, (float)0.69308740, (float)0.69779050, + (float)0.70246369, (float)0.70710677, (float)0.71171963, (float)0.71630198, (float)0.72085363, + (float)0.72537440, (float)0.72986406, (float)0.73432255, (float)0.73874950, (float)0.74314487, + (float)0.74750835, (float)0.75183982, (float)0.75613910, (float)0.76040596, (float)0.76464027, + (float)0.76884186, (float)0.77301043, (float)0.77714598, (float)0.78124821, (float)0.78531694, + (float)0.78935206, (float)0.79335338, (float)0.79732066, (float)0.80125386, (float)0.80515265, + (float)0.80901700, (float)0.81284672, (float)0.81664157, (float)0.82040149, (float)0.82412618, + (float)0.82781565, (float)0.83146966, (float)0.83508795, (float)0.83867061, (float)0.84221727, + (float)0.84572780, (float)0.84920216, (float)0.85264021, (float)0.85604161, (float)0.85940641, + (float)0.86273444, (float)0.86602545, (float)0.86927933, (float)0.87249607, (float)0.87567532, + (float)0.87881714, (float)0.88192129, (float)0.88498765, (float)0.88801610, (float)0.89100653, + (float)0.89395881, (float)0.89687276, (float)0.89974827, (float)0.90258533, (float)0.90538365, + (float)0.90814316, (float)0.91086388, (float)0.91354549, (float)0.91618794, (float)0.91879123, + (float)0.92135513, (float)0.92387950, (float)0.92636442, (float)0.92880958, (float)0.93121493, + (float)0.93358046, (float)0.93590593, (float)0.93819135, (float)0.94043654, (float)0.94264150, + (float)0.94480604, (float)0.94693011, (float)0.94901365, (float)0.95105654, (float)0.95305866, + (float)0.95501995, (float)0.95694035, (float)0.95881975, (float)0.96065807, (float)0.96245527, + (float)0.96421117, (float)0.96592581, (float)0.96759909, (float)0.96923089, (float)0.97082120, + (float)0.97236991, (float)0.97387701, (float)0.97534233, (float)0.97676587, (float)0.97814763, + (float)0.97948742, (float)0.98078531, (float)0.98204112, (float)0.98325491, (float)0.98442656, + (float)0.98555607, (float)0.98664331, (float)0.98768836, (float)0.98869103, (float)0.98965138, + (float)0.99056935, (float)0.99144489, (float)0.99227792, (float)0.99306846, (float)0.99381649, + (float)0.99452192, (float)0.99518472, (float)0.99580491, (float)0.99638247, (float)0.99691731, + (float)0.99740952, (float)0.99785894, (float)0.99826562, (float)0.99862951, (float)0.99895066, + (float)0.99922901, (float)0.99946457, (float)0.99965733, (float)0.99980724, (float)0.99991435, + (float)0.99997860, (float)1.00000000, (float)0.99997860, (float)0.99991435, (float)0.99980724, + (float)0.99965733, (float)0.99946457, (float)0.99922901, (float)0.99895066, (float)0.99862951, + (float)0.99826562, (float)0.99785894, (float)0.99740946, (float)0.99691731, (float)0.99638247, + (float)0.99580491, (float)0.99518472, (float)0.99452192, (float)0.99381644, (float)0.99306846, + (float)0.99227792, (float)0.99144489, (float)0.99056935, (float)0.98965138, (float)0.98869103, + (float)0.98768836, (float)0.98664331, (float)0.98555607, (float)0.98442656, (float)0.98325491, + (float)0.98204112, (float)0.98078525, (float)0.97948742, (float)0.97814757, (float)0.97676587, + (float)0.97534227, (float)0.97387695, (float)0.97236991, (float)0.97082120, (float)0.96923089, + (float)0.96759909, (float)0.96592581, (float)0.96421117, (float)0.96245521, (float)0.96065807, + (float)0.95881969, (float)0.95694029, (float)0.95501995, (float)0.95305860, (float)0.95105648, + (float)0.94901365, (float)0.94693011, (float)0.94480604, (float)0.94264150, (float)0.94043654, + (float)0.93819129, (float)0.93590593, (float)0.93358046, (float)0.93121493, (float)0.92880952, + (float)0.92636436, (float)0.92387950, (float)0.92135507, (float)0.91879123, (float)0.91618794, + (float)0.91354543, (float)0.91086382, (float)0.90814310, (float)0.90538365, (float)0.90258527, + (float)0.89974827, (float)0.89687276, (float)0.89395875, (float)0.89100647, (float)0.88801610, + (float)0.88498759, (float)0.88192123, (float)0.87881714, (float)0.87567532, (float)0.87249595, + (float)0.86927933, (float)0.86602539, (float)0.86273432, (float)0.85940641, (float)0.85604161, + (float)0.85264009, (float)0.84920216, (float)0.84572780, (float)0.84221715, (float)0.83867055, + (float)0.83508795, (float)0.83146954, (float)0.82781565, (float)0.82412612, (float)0.82040137, + (float)0.81664157, (float)0.81284660, (float)0.80901700, (float)0.80515265, (float)0.80125374, + (float)0.79732066, (float)0.79335332, (float)0.78935200, (float)0.78531694, (float)0.78124815, + (float)0.77714586, (float)0.77301049, (float)0.76884180, (float)0.76464021, (float)0.76040596, + (float)0.75613904, (float)0.75183970, (float)0.74750835, (float)0.74314481, (float)0.73874938, + (float)0.73432249, (float)0.72986400, (float)0.72537428, (float)0.72085363, (float)0.71630186, + (float)0.71171951, (float)0.70710677, (float)0.70246363, (float)0.69779032, (float)0.69308734, + (float)0.68835449, (float)0.68359220, (float)0.67880070, (float)0.67398006, (float)0.66913044, + (float)0.66425240, (float)0.65934575, (float)0.65441096, (float)0.64944804, (float)0.64445722, + (float)0.63943905, (float)0.63439327, (float)0.62932026, (float)0.62422055, (float)0.61909389, + (float)0.61394072, (float)0.60876143, (float)0.60355592, (float)0.59832448, (float)0.59306765, + (float)0.58778518, (float)0.58247757, (float)0.57714522, (float)0.57178789, (float)0.56640613, + (float)0.56100023, (float)0.55557019, (float)0.55011630, (float)0.54463905, (float)0.53913826, + (float)0.53361434, (float)0.52806783, (float)0.52249849, (float)0.51690674, (float)0.51129305, + (float)0.50565726, (float)0.50000006, (float)0.49432117, (float)0.48862115, (float)0.48290038, + (float)0.47715873, (float)0.47139663, (float)0.46561456, (float)0.45981231, (float)0.45399037, + (float)0.44814920, (float)0.44228864, (float)0.43640912, (float)0.43051112, (float)0.42459446, + (float)0.41865960, (float)0.41270703, (float)0.40673658, (float)0.40074870, (float)0.39474386, + (float)0.38872188, (float)0.38268328, (float)0.37662849, (float)0.37055734, (float)0.36447033, + (float)0.35836792, (float)0.35224995, (float)0.34611690, (float)0.33996922, (float)0.33380675, + (float)0.32763001, (float)0.32143945, (float)0.31523487, (float)0.30901679, (float)0.30278572, + (float)0.29654145, (float)0.29028472, (float)0.28401530, (float)0.27773371, (float)0.27144048, + (float)0.26513538, (float)0.25881892, (float)0.25249159, (float)0.24615324, (float)0.23980433, + (float)0.23344538, (float)0.22707619, (float)0.22069728, (float)0.21430916, (float)0.20791161, + (float)0.20150517, (float)0.19509031, (float)0.18866688, (float)0.18223536, (float)0.17579627, + (float)0.16934940, (float)0.16289529, (float)0.15643445, (float)0.14996666, (float)0.14349243, + (float)0.13701232, (float)0.13052608, (float)0.12403426, (float)0.11753736, (float)0.11103519, + (float)0.10452849, (float)0.09801710, (float)0.09150149, (float)0.08498220, (float)0.07845904, + (float)0.07193252, (float)0.06540315, (float)0.05887074, (float)0.05233581, (float)0.04579888, + (float)0.03925974, (float)0.03271893, (float)0.02617695, (float)0.01963361, (float)0.01308943, + (float)0.00654493, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000 +}; + + +// Hanning window: for 30ms with 1024 fft with symmetric zeros at 16kHz +static const float kBlocks480w1024[1024] = { + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00327249, (float)0.00654494, + (float)0.00981732, (float)0.01308960, (float)0.01636173, (float)0.01963369, (float)0.02290544, + (float)0.02617695, (float)0.02944817, (float)0.03271908, (float)0.03598964, (float)0.03925982, + (float)0.04252957, (float)0.04579887, (float)0.04906768, (float)0.05233596, (float)0.05560368, + (float)0.05887080, (float)0.06213730, (float)0.06540313, (float)0.06866825, (float)0.07193266, + (float)0.07519628, (float)0.07845910, (float)0.08172107, (float)0.08498218, (float)0.08824237, + (float)0.09150162, (float)0.09475989, (float)0.09801714, (float)0.10127335, (float)0.10452846, + (float)0.10778246, (float)0.11103531, (float)0.11428697, (float)0.11753740, (float)0.12078657, + (float)0.12403446, (float)0.12728101, (float)0.13052620, (float)0.13376999, (float)0.13701233, + (float)0.14025325, (float)0.14349262, (float)0.14673047, (float)0.14996676, (float)0.15320145, + (float)0.15643448, (float)0.15966582, (float)0.16289547, (float)0.16612339, (float)0.16934951, + (float)0.17257382, (float)0.17579629, (float)0.17901687, (float)0.18223552, (float)0.18545224, + (float)0.18866697, (float)0.19187967, (float)0.19509032, (float)0.19829889, (float)0.20150533, + (float)0.20470962, (float)0.20791170, (float)0.21111156, (float)0.21430916, (float)0.21750447, + (float)0.22069745, (float)0.22388805, (float)0.22707628, (float)0.23026206, (float)0.23344538, + (float)0.23662618, (float)0.23980446, (float)0.24298020, (float)0.24615330, (float)0.24932377, + (float)0.25249159, (float)0.25565669, (float)0.25881904, (float)0.26197866, (float)0.26513544, + (float)0.26828939, (float)0.27144045, (float)0.27458861, (float)0.27773386, (float)0.28087610, + (float)0.28401536, (float)0.28715158, (float)0.29028466, (float)0.29341471, (float)0.29654160, + (float)0.29966527, (float)0.30278578, (float)0.30590302, (float)0.30901700, (float)0.31212768, + (float)0.31523499, (float)0.31833893, (float)0.32143945, (float)0.32453656, (float)0.32763019, + (float)0.33072028, (float)0.33380687, (float)0.33688986, (float)0.33996925, (float)0.34304500, + (float)0.34611708, (float)0.34918544, (float)0.35225007, (float)0.35531089, (float)0.35836795, + (float)0.36142117, (float)0.36447051, (float)0.36751595, (float)0.37055743, (float)0.37359497, + (float)0.37662852, (float)0.37965801, (float)0.38268346, (float)0.38570479, (float)0.38872197, + (float)0.39173502, (float)0.39474389, (float)0.39774847, (float)0.40074885, (float)0.40374491, + (float)0.40673664, (float)0.40972406, (float)0.41270703, (float)0.41568562, (float)0.41865975, + (float)0.42162940, (float)0.42459452, (float)0.42755508, (float)0.43051112, (float)0.43346250, + (float)0.43640924, (float)0.43935132, (float)0.44228873, (float)0.44522133, (float)0.44814920, + (float)0.45107228, (float)0.45399052, (float)0.45690390, (float)0.45981237, (float)0.46271592, + (float)0.46561453, (float)0.46850815, (float)0.47139674, (float)0.47428030, (float)0.47715878, + (float)0.48003215, (float)0.48290035, (float)0.48576337, (float)0.48862126, (float)0.49147385, + (float)0.49432120, (float)0.49716330, (float)0.50000000, (float)0.50283140, (float)0.50565743, + (float)0.50847799, (float)0.51129311, (float)0.51410276, (float)0.51690692, (float)0.51970553, + (float)0.52249855, (float)0.52528602, (float)0.52806789, (float)0.53084403, (float)0.53361452, + (float)0.53637928, (float)0.53913832, (float)0.54189163, (float)0.54463905, (float)0.54738063, + (float)0.55011642, (float)0.55284631, (float)0.55557024, (float)0.55828828, (float)0.56100029, + (float)0.56370628, (float)0.56640625, (float)0.56910014, (float)0.57178795, (float)0.57446963, + (float)0.57714522, (float)0.57981455, (float)0.58247769, (float)0.58513463, (float)0.58778524, + (float)0.59042960, (float)0.59306765, (float)0.59569931, (float)0.59832460, (float)0.60094351, + (float)0.60355598, (float)0.60616195, (float)0.60876143, (float)0.61135441, (float)0.61394083, + (float)0.61652070, (float)0.61909395, (float)0.62166059, (float)0.62422055, (float)0.62677383, + (float)0.62932038, (float)0.63186020, (float)0.63439333, (float)0.63691956, (float)0.63943899, + (float)0.64195162, (float)0.64445734, (float)0.64695615, (float)0.64944810, (float)0.65193301, + (float)0.65441096, (float)0.65688187, (float)0.65934587, (float)0.66180271, (float)0.66425246, + (float)0.66669512, (float)0.66913062, (float)0.67155898, (float)0.67398012, (float)0.67639405, + (float)0.67880076, (float)0.68120021, (float)0.68359232, (float)0.68597710, (float)0.68835455, + (float)0.69072467, (float)0.69308740, (float)0.69544262, (float)0.69779050, (float)0.70013082, + (float)0.70246369, (float)0.70478904, (float)0.70710677, (float)0.70941699, (float)0.71171963, + (float)0.71401459, (float)0.71630198, (float)0.71858168, (float)0.72085363, (float)0.72311789, + (float)0.72537440, (float)0.72762316, (float)0.72986406, (float)0.73209721, (float)0.73432255, + (float)0.73653996, (float)0.73874950, (float)0.74095118, (float)0.74314487, (float)0.74533057, + (float)0.74750835, (float)0.74967808, (float)0.75183982, (float)0.75399351, (float)0.75613910, + (float)0.75827658, (float)0.76040596, (float)0.76252723, (float)0.76464027, (float)0.76674515, + (float)0.76884186, (float)0.77093029, (float)0.77301043, (float)0.77508241, (float)0.77714598, + (float)0.77920127, (float)0.78124821, (float)0.78328675, (float)0.78531694, (float)0.78733873, + (float)0.78935206, (float)0.79135692, (float)0.79335338, (float)0.79534125, (float)0.79732066, + (float)0.79929149, (float)0.80125386, (float)0.80320752, (float)0.80515265, (float)0.80708915, + (float)0.80901700, (float)0.81093621, (float)0.81284672, (float)0.81474853, (float)0.81664157, + (float)0.81852591, (float)0.82040149, (float)0.82226825, (float)0.82412618, (float)0.82597536, + (float)0.82781565, (float)0.82964706, (float)0.83146966, (float)0.83328325, (float)0.83508795, + (float)0.83688378, (float)0.83867061, (float)0.84044838, (float)0.84221727, (float)0.84397703, + (float)0.84572780, (float)0.84746957, (float)0.84920216, (float)0.85092574, (float)0.85264021, + (float)0.85434544, (float)0.85604161, (float)0.85772866, (float)0.85940641, (float)0.86107504, + (float)0.86273444, (float)0.86438453, (float)0.86602545, (float)0.86765707, (float)0.86927933, + (float)0.87089235, (float)0.87249607, (float)0.87409031, (float)0.87567532, (float)0.87725097, + (float)0.87881714, (float)0.88037390, (float)0.88192129, (float)0.88345921, (float)0.88498765, + (float)0.88650668, (float)0.88801610, (float)0.88951612, (float)0.89100653, (float)0.89248741, + (float)0.89395881, (float)0.89542055, (float)0.89687276, (float)0.89831537, (float)0.89974827, + (float)0.90117162, (float)0.90258533, (float)0.90398932, (float)0.90538365, (float)0.90676826, + (float)0.90814316, (float)0.90950841, (float)0.91086388, (float)0.91220951, (float)0.91354549, + (float)0.91487163, (float)0.91618794, (float)0.91749454, (float)0.91879123, (float)0.92007810, + (float)0.92135513, (float)0.92262226, (float)0.92387950, (float)0.92512691, (float)0.92636442, + (float)0.92759192, (float)0.92880958, (float)0.93001723, (float)0.93121493, (float)0.93240267, + (float)0.93358046, (float)0.93474817, (float)0.93590593, (float)0.93705362, (float)0.93819135, + (float)0.93931901, (float)0.94043654, (float)0.94154406, (float)0.94264150, (float)0.94372880, + (float)0.94480604, (float)0.94587320, (float)0.94693011, (float)0.94797695, (float)0.94901365, + (float)0.95004016, (float)0.95105654, (float)0.95206273, (float)0.95305866, (float)0.95404440, + (float)0.95501995, (float)0.95598525, (float)0.95694035, (float)0.95788521, (float)0.95881975, + (float)0.95974404, (float)0.96065807, (float)0.96156180, (float)0.96245527, (float)0.96333838, + (float)0.96421117, (float)0.96507370, (float)0.96592581, (float)0.96676767, (float)0.96759909, + (float)0.96842021, (float)0.96923089, (float)0.97003126, (float)0.97082120, (float)0.97160077, + (float)0.97236991, (float)0.97312868, (float)0.97387701, (float)0.97461486, (float)0.97534233, + (float)0.97605932, (float)0.97676587, (float)0.97746199, (float)0.97814763, (float)0.97882277, + (float)0.97948742, (float)0.98014158, (float)0.98078531, (float)0.98141843, (float)0.98204112, + (float)0.98265332, (float)0.98325491, (float)0.98384601, (float)0.98442656, (float)0.98499662, + (float)0.98555607, (float)0.98610497, (float)0.98664331, (float)0.98717111, (float)0.98768836, + (float)0.98819500, (float)0.98869103, (float)0.98917651, (float)0.98965138, (float)0.99011570, + (float)0.99056935, (float)0.99101239, (float)0.99144489, (float)0.99186671, (float)0.99227792, + (float)0.99267852, (float)0.99306846, (float)0.99344778, (float)0.99381649, (float)0.99417448, + (float)0.99452192, (float)0.99485862, (float)0.99518472, (float)0.99550015, (float)0.99580491, + (float)0.99609905, (float)0.99638247, (float)0.99665523, (float)0.99691731, (float)0.99716878, + (float)0.99740952, (float)0.99763954, (float)0.99785894, (float)0.99806762, (float)0.99826562, + (float)0.99845290, (float)0.99862951, (float)0.99879545, (float)0.99895066, (float)0.99909520, + (float)0.99922901, (float)0.99935216, (float)0.99946457, (float)0.99956632, (float)0.99965733, + (float)0.99973762, (float)0.99980724, (float)0.99986613, (float)0.99991435, (float)0.99995178, + (float)0.99997860, (float)0.99999464, (float)1.00000000, (float)0.99999464, (float)0.99997860, + (float)0.99995178, (float)0.99991435, (float)0.99986613, (float)0.99980724, (float)0.99973762, + (float)0.99965733, (float)0.99956632, (float)0.99946457, (float)0.99935216, (float)0.99922901, + (float)0.99909520, (float)0.99895066, (float)0.99879545, (float)0.99862951, (float)0.99845290, + (float)0.99826562, (float)0.99806762, (float)0.99785894, (float)0.99763954, (float)0.99740946, + (float)0.99716872, (float)0.99691731, (float)0.99665523, (float)0.99638247, (float)0.99609905, + (float)0.99580491, (float)0.99550015, (float)0.99518472, (float)0.99485862, (float)0.99452192, + (float)0.99417448, (float)0.99381644, (float)0.99344778, (float)0.99306846, (float)0.99267852, + (float)0.99227792, (float)0.99186671, (float)0.99144489, (float)0.99101239, (float)0.99056935, + (float)0.99011564, (float)0.98965138, (float)0.98917651, (float)0.98869103, (float)0.98819494, + (float)0.98768836, (float)0.98717111, (float)0.98664331, (float)0.98610497, (float)0.98555607, + (float)0.98499656, (float)0.98442656, (float)0.98384601, (float)0.98325491, (float)0.98265326, + (float)0.98204112, (float)0.98141843, (float)0.98078525, (float)0.98014158, (float)0.97948742, + (float)0.97882277, (float)0.97814757, (float)0.97746193, (float)0.97676587, (float)0.97605932, + (float)0.97534227, (float)0.97461486, (float)0.97387695, (float)0.97312862, (float)0.97236991, + (float)0.97160077, (float)0.97082120, (float)0.97003126, (float)0.96923089, (float)0.96842015, + (float)0.96759909, (float)0.96676761, (float)0.96592581, (float)0.96507365, (float)0.96421117, + (float)0.96333838, (float)0.96245521, (float)0.96156180, (float)0.96065807, (float)0.95974404, + (float)0.95881969, (float)0.95788515, (float)0.95694029, (float)0.95598525, (float)0.95501995, + (float)0.95404440, (float)0.95305860, (float)0.95206267, (float)0.95105648, (float)0.95004016, + (float)0.94901365, (float)0.94797695, (float)0.94693011, (float)0.94587314, (float)0.94480604, + (float)0.94372880, (float)0.94264150, (float)0.94154406, (float)0.94043654, (float)0.93931895, + (float)0.93819129, (float)0.93705362, (float)0.93590593, (float)0.93474817, (float)0.93358046, + (float)0.93240267, (float)0.93121493, (float)0.93001723, (float)0.92880952, (float)0.92759192, + (float)0.92636436, (float)0.92512691, (float)0.92387950, (float)0.92262226, (float)0.92135507, + (float)0.92007804, (float)0.91879123, (float)0.91749448, (float)0.91618794, (float)0.91487157, + (float)0.91354543, (float)0.91220951, (float)0.91086382, (float)0.90950835, (float)0.90814310, + (float)0.90676820, (float)0.90538365, (float)0.90398932, (float)0.90258527, (float)0.90117157, + (float)0.89974827, (float)0.89831525, (float)0.89687276, (float)0.89542055, (float)0.89395875, + (float)0.89248741, (float)0.89100647, (float)0.88951600, (float)0.88801610, (float)0.88650662, + (float)0.88498759, (float)0.88345915, (float)0.88192123, (float)0.88037384, (float)0.87881714, + (float)0.87725091, (float)0.87567532, (float)0.87409031, (float)0.87249595, (float)0.87089223, + (float)0.86927933, (float)0.86765701, (float)0.86602539, (float)0.86438447, (float)0.86273432, + (float)0.86107504, (float)0.85940641, (float)0.85772860, (float)0.85604161, (float)0.85434544, + (float)0.85264009, (float)0.85092574, (float)0.84920216, (float)0.84746951, (float)0.84572780, + (float)0.84397697, (float)0.84221715, (float)0.84044844, (float)0.83867055, (float)0.83688372, + (float)0.83508795, (float)0.83328319, (float)0.83146954, (float)0.82964706, (float)0.82781565, + (float)0.82597530, (float)0.82412612, (float)0.82226813, (float)0.82040137, (float)0.81852591, + (float)0.81664157, (float)0.81474847, (float)0.81284660, (float)0.81093609, (float)0.80901700, + (float)0.80708915, (float)0.80515265, (float)0.80320752, (float)0.80125374, (float)0.79929143, + (float)0.79732066, (float)0.79534125, (float)0.79335332, (float)0.79135686, (float)0.78935200, + (float)0.78733861, (float)0.78531694, (float)0.78328675, (float)0.78124815, (float)0.77920121, + (float)0.77714586, (float)0.77508223, (float)0.77301049, (float)0.77093029, (float)0.76884180, + (float)0.76674509, (float)0.76464021, (float)0.76252711, (float)0.76040596, (float)0.75827658, + (float)0.75613904, (float)0.75399339, (float)0.75183970, (float)0.74967796, (float)0.74750835, + (float)0.74533057, (float)0.74314481, (float)0.74095106, (float)0.73874938, (float)0.73653996, + (float)0.73432249, (float)0.73209721, (float)0.72986400, (float)0.72762305, (float)0.72537428, + (float)0.72311789, (float)0.72085363, (float)0.71858162, (float)0.71630186, (float)0.71401453, + (float)0.71171951, (float)0.70941705, (float)0.70710677, (float)0.70478898, (float)0.70246363, + (float)0.70013070, (float)0.69779032, (float)0.69544268, (float)0.69308734, (float)0.69072461, + (float)0.68835449, (float)0.68597704, (float)0.68359220, (float)0.68120021, (float)0.67880070, + (float)0.67639399, (float)0.67398006, (float)0.67155886, (float)0.66913044, (float)0.66669512, + (float)0.66425240, (float)0.66180259, (float)0.65934575, (float)0.65688181, (float)0.65441096, + (float)0.65193301, (float)0.64944804, (float)0.64695609, (float)0.64445722, (float)0.64195150, + (float)0.63943905, (float)0.63691956, (float)0.63439327, (float)0.63186014, (float)0.62932026, + (float)0.62677372, (float)0.62422055, (float)0.62166059, (float)0.61909389, (float)0.61652064, + (float)0.61394072, (float)0.61135429, (float)0.60876143, (float)0.60616189, (float)0.60355592, + (float)0.60094339, (float)0.59832448, (float)0.59569913, (float)0.59306765, (float)0.59042960, + (float)0.58778518, (float)0.58513451, (float)0.58247757, (float)0.57981461, (float)0.57714522, + (float)0.57446963, (float)0.57178789, (float)0.56910002, (float)0.56640613, (float)0.56370628, + (float)0.56100023, (float)0.55828822, (float)0.55557019, (float)0.55284619, (float)0.55011630, + (float)0.54738069, (float)0.54463905, (float)0.54189152, (float)0.53913826, (float)0.53637916, + (float)0.53361434, (float)0.53084403, (float)0.52806783, (float)0.52528596, (float)0.52249849, + (float)0.51970541, (float)0.51690674, (float)0.51410276, (float)0.51129305, (float)0.50847787, + (float)0.50565726, (float)0.50283122, (float)0.50000006, (float)0.49716327, (float)0.49432117, + (float)0.49147379, (float)0.48862115, (float)0.48576325, (float)0.48290038, (float)0.48003212, + (float)0.47715873, (float)0.47428021, (float)0.47139663, (float)0.46850798, (float)0.46561456, + (float)0.46271589, (float)0.45981231, (float)0.45690379, (float)0.45399037, (float)0.45107210, + (float)0.44814920, (float)0.44522130, (float)0.44228864, (float)0.43935123, (float)0.43640912, + (float)0.43346232, (float)0.43051112, (float)0.42755505, (float)0.42459446, (float)0.42162928, + (float)0.41865960, (float)0.41568545, (float)0.41270703, (float)0.40972400, (float)0.40673658, + (float)0.40374479, (float)0.40074870, (float)0.39774850, (float)0.39474386, (float)0.39173496, + (float)0.38872188, (float)0.38570464, (float)0.38268328, (float)0.37965804, (float)0.37662849, + (float)0.37359491, (float)0.37055734, (float)0.36751580, (float)0.36447033, (float)0.36142117, + (float)0.35836792, (float)0.35531086, (float)0.35224995, (float)0.34918529, (float)0.34611690, + (float)0.34304500, (float)0.33996922, (float)0.33688980, (float)0.33380675, (float)0.33072016, + (float)0.32763001, (float)0.32453656, (float)0.32143945, (float)0.31833887, (float)0.31523487, + (float)0.31212750, (float)0.30901679, (float)0.30590302, (float)0.30278572, (float)0.29966521, + (float)0.29654145, (float)0.29341453, (float)0.29028472, (float)0.28715155, (float)0.28401530, + (float)0.28087601, (float)0.27773371, (float)0.27458847, (float)0.27144048, (float)0.26828936, + (float)0.26513538, (float)0.26197854, (float)0.25881892, (float)0.25565651, (float)0.25249159, + (float)0.24932374, (float)0.24615324, (float)0.24298008, (float)0.23980433, (float)0.23662600, + (float)0.23344538, (float)0.23026201, (float)0.22707619, (float)0.22388794, (float)0.22069728, + (float)0.21750426, (float)0.21430916, (float)0.21111152, (float)0.20791161, (float)0.20470949, + (float)0.20150517, (float)0.19829892, (float)0.19509031, (float)0.19187963, (float)0.18866688, + (float)0.18545210, (float)0.18223536, (float)0.17901689, (float)0.17579627, (float)0.17257376, + (float)0.16934940, (float)0.16612324, (float)0.16289529, (float)0.15966584, (float)0.15643445, + (float)0.15320137, (float)0.14996666, (float)0.14673033, (float)0.14349243, (float)0.14025325, + (float)0.13701232, (float)0.13376991, (float)0.13052608, (float)0.12728085, (float)0.12403426, + (float)0.12078657, (float)0.11753736, (float)0.11428688, (float)0.11103519, (float)0.10778230, + (float)0.10452849, (float)0.10127334, (float)0.09801710, (float)0.09475980, (float)0.09150149, + (float)0.08824220, (float)0.08498220, (float)0.08172106, (float)0.07845904, (float)0.07519618, + (float)0.07193252, (float)0.06866808, (float)0.06540315, (float)0.06213728, (float)0.05887074, + (float)0.05560357, (float)0.05233581, (float)0.04906749, (float)0.04579888, (float)0.04252954, + (float)0.03925974, (float)0.03598953, (float)0.03271893, (float)0.02944798, (float)0.02617695, + (float)0.02290541, (float)0.01963361, (float)0.01636161, (float)0.01308943, (float)0.00981712, + (float)0.00654493, (float)0.00327244, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, + (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000 +}; + +#endif // MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/render_queue_item_verifier.h b/third_party/libwebrtc/webrtc/modules/audio_processing/render_queue_item_verifier.h new file mode 100644 index 0000000000..b8aff4a107 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/render_queue_item_verifier.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_ +#define MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H_ + +#include <vector> + +namespace webrtc { + +// Functor to use when supplying a verifier function for the queue item +// verifcation. +template <typename T> +class RenderQueueItemVerifier { + public: + explicit RenderQueueItemVerifier(size_t minimum_capacity) + : minimum_capacity_(minimum_capacity) {} + + bool operator()(const std::vector<T>& v) const { + return v.capacity() >= minimum_capacity_; + } + + private: + size_t minimum_capacity_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RENDER_QUEUE_ITEM_VERIFIER_H__ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector.cc new file mode 100644 index 0000000000..b35c1558c7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector.cc @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/residual_echo_detector.h" + +#include <algorithm> +#include <numeric> + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/atomicops.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace { + +float Power(rtc::ArrayView<const float> input) { + if (input.empty()) { + return 0.f; + } + return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) / + input.size(); +} + +constexpr size_t kLookbackFrames = 650; +// TODO(ivoc): Verify the size of this buffer. +constexpr size_t kRenderBufferSize = 30; +constexpr float kAlpha = 0.001f; +// 10 seconds of data, updated every 10 ms. +constexpr size_t kAggregationBufferSize = 10 * 100; + +} // namespace + +namespace webrtc { + +int ResidualEchoDetector::instance_count_ = 0; + +ResidualEchoDetector::ResidualEchoDetector() + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + render_buffer_(kRenderBufferSize), + render_power_(kLookbackFrames), + render_power_mean_(kLookbackFrames), + render_power_std_dev_(kLookbackFrames), + covariances_(kLookbackFrames), + recent_likelihood_max_(kAggregationBufferSize) {} + +ResidualEchoDetector::~ResidualEchoDetector() = default; + +void ResidualEchoDetector::AnalyzeRenderAudio( + rtc::ArrayView<const float> render_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(), + 48000, 1); + + if (render_buffer_.Size() == 0) { + frames_since_zero_buffer_size_ = 0; + } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { + // This can happen in a few cases: at the start of a call, due to a glitch + // or due to clock drift. The excess capture value will be ignored. + // TODO(ivoc): Include how often this happens in APM stats. + render_buffer_.Pop(); + frames_since_zero_buffer_size_ = 0; + } + ++frames_since_zero_buffer_size_; + float power = Power(render_audio); + render_buffer_.Push(power); +} + +void ResidualEchoDetector::AnalyzeCaptureAudio( + rtc::ArrayView<const float> capture_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_capture", capture_audio.size(), + capture_audio.data(), 48000, 1); + + if (first_process_call_) { + // On the first process call (so the start of a call), we must flush the + // render buffer, otherwise the render data will be delayed. + render_buffer_.Clear(); + first_process_call_ = false; + } + + // Get the next render value. + const rtc::Optional<float> buffered_render_power = render_buffer_.Pop(); + if (!buffered_render_power) { + // This can happen in a few cases: at the start of a call, due to a glitch + // or due to clock drift. The excess capture value will be ignored. + // TODO(ivoc): Include how often this happens in APM stats. + return; + } + // Update the render statistics, and store the statistics in circular buffers. + render_statistics_.Update(*buffered_render_power); + RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames); + render_power_[next_insertion_index_] = *buffered_render_power; + render_power_mean_[next_insertion_index_] = render_statistics_.mean(); + render_power_std_dev_[next_insertion_index_] = + render_statistics_.std_deviation(); + + // Get the next capture value, update capture statistics and add the relevant + // values to the buffers. + const float capture_power = Power(capture_audio); + capture_statistics_.Update(capture_power); + const float capture_mean = capture_statistics_.mean(); + const float capture_std_deviation = capture_statistics_.std_deviation(); + + // Update the covariance values and determine the new echo likelihood. + echo_likelihood_ = 0.f; + size_t read_index = next_insertion_index_; + + int best_delay = -1; + for (size_t delay = 0; delay < covariances_.size(); ++delay) { + RTC_DCHECK_LT(read_index, render_power_.size()); + covariances_[delay].Update(capture_power, capture_mean, + capture_std_deviation, render_power_[read_index], + render_power_mean_[read_index], + render_power_std_dev_[read_index]); + read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1; + + if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) { + echo_likelihood_ = covariances_[delay].normalized_cross_correlation(); + best_delay = static_cast<int>(delay); + } + } + // This is a temporary log message to help find the underlying cause for echo + // likelihoods > 1.0. + // TODO(ivoc): Remove once the issue is resolved. + if (echo_likelihood_ > 1.1f) { + // Make sure we don't spam the log. + if (log_counter_ < 5 && best_delay != -1) { + size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay; + if (read_index >= kLookbackFrames) { + read_index -= kLookbackFrames; + } + RTC_DCHECK_LT(read_index, render_power_.size()); + RTC_LOG_F(LS_ERROR) << "Echo detector internal state: {" + << "Echo likelihood: " << echo_likelihood_ + << ", Best Delay: " << best_delay << ", Covariance: " + << covariances_[best_delay].covariance() + << ", Last capture power: " << capture_power + << ", Capture mean: " << capture_mean + << ", Capture_standard deviation: " + << capture_std_deviation << ", Last render power: " + << render_power_[read_index] + << ", Render mean: " << render_power_mean_[read_index] + << ", Render standard deviation: " + << render_power_std_dev_[read_index] + << ", Reliability: " << reliability_ << "}"; + log_counter_++; + } + } + RTC_DCHECK_LT(echo_likelihood_, 1.1f); + + reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f; + echo_likelihood_ *= reliability_; + // This is a temporary fix to prevent echo likelihood values > 1.0. + // TODO(ivoc): Find the root cause of this issue and fix it. + echo_likelihood_ = std::min(echo_likelihood_, 1.0f); + int echo_percentage = static_cast<int>(echo_likelihood_ * 100); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood", + echo_percentage, 0, 100, 100 /* number of bins */); + + // Update the buffer of recent likelihood values. + recent_likelihood_max_.Update(echo_likelihood_); + + // Update the next insertion index. + next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1) + ? next_insertion_index_ + 1 + : 0; +} + +void ResidualEchoDetector::Initialize() { + render_buffer_.Clear(); + std::fill(render_power_.begin(), render_power_.end(), 0.f); + std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); + std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f); + render_statistics_.Clear(); + capture_statistics_.Clear(); + recent_likelihood_max_.Clear(); + for (auto& cov : covariances_) { + cov.Clear(); + } + echo_likelihood_ = 0.f; + next_insertion_index_ = 0; + reliability_ = 0.f; +} + +void ResidualEchoDetector::PackRenderAudioBuffer( + AudioBuffer* audio, + std::vector<float>* packed_buffer) { + packed_buffer->clear(); + packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0], + audio->channels_f()[0] + audio->num_frames()); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector.h b/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector.h new file mode 100644 index 0000000000..de1b989110 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ + +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/echo_detector/circular_buffer.h" +#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" +#include "modules/audio_processing/echo_detector/moving_max.h" +#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; +class EchoDetector; + +class ResidualEchoDetector { + public: + ResidualEchoDetector(); + ~ResidualEchoDetector(); + + // This function should be called while holding the render lock. + void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio); + + // This function should be called while holding the capture lock. + void AnalyzeCaptureAudio(rtc::ArrayView<const float> capture_audio); + + // This function should be called while holding the capture lock. + void Initialize(); + + // This function is for testing purposes only. + void SetReliabilityForTest(float value) { reliability_ = value; } + + static void PackRenderAudioBuffer(AudioBuffer* audio, + std::vector<float>* packed_buffer); + + // This function should be called while holding the capture lock. + float echo_likelihood() const { return echo_likelihood_; } + + float echo_likelihood_recent_max() const { + return recent_likelihood_max_.max(); + } + + private: + static int instance_count_; + std::unique_ptr<ApmDataDumper> data_dumper_; + // Keep track if the |Process| function has been previously called. + bool first_process_call_ = true; + // Buffer for storing the power of incoming farend buffers. This is needed for + // cases where calls to BufferFarend and Process are jittery. + CircularBuffer render_buffer_; + // Count how long ago it was that the size of |render_buffer_| was zero. This + // value is also reset to zero when clock drift is detected and a value from + // the renderbuffer is discarded, even though the buffer is not actually zero + // at that point. This is done to avoid repeatedly removing elements in this + // situation. + size_t frames_since_zero_buffer_size_ = 0; + + // Circular buffers containing delayed versions of the power, mean and + // standard deviation, for calculating the delayed covariance values. + std::vector<float> render_power_; + std::vector<float> render_power_mean_; + std::vector<float> render_power_std_dev_; + // Covariance estimates for different delay values. + std::vector<NormalizedCovarianceEstimator> covariances_; + // Index where next element should be inserted in all of the above circular + // buffers. + size_t next_insertion_index_ = 0; + + MeanVarianceEstimator render_statistics_; + MeanVarianceEstimator capture_statistics_; + // Current echo likelihood. + float echo_likelihood_ = 0.f; + // Reliability of the current likelihood. + float reliability_ = 0.f; + MovingMax recent_likelihood_max_; + + int log_counter_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RESIDUAL_ECHO_DETECTOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector_unittest.cc new file mode 100644 index 0000000000..baf83ba4aa --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/residual_echo_detector_unittest.cc @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <vector> + +#include "modules/audio_processing/residual_echo_detector.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(ResidualEchoDetectorTests, Echo) { + ResidualEchoDetector echo_detector; + echo_detector.SetReliabilityForTest(1.0f); + std::vector<float> ones(160, 1.f); + std::vector<float> zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector.AnalyzeRenderAudio(ones); + echo_detector.AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector.AnalyzeRenderAudio(zeros); + echo_detector.AnalyzeCaptureAudio(ones); + } else { + echo_detector.AnalyzeRenderAudio(zeros); + echo_detector.AnalyzeCaptureAudio(zeros); + } + } + // We expect to detect echo with near certain likelihood. + EXPECT_NEAR(1.f, echo_detector.echo_likelihood(), 0.01f); +} + +TEST(ResidualEchoDetectorTests, NoEcho) { + ResidualEchoDetector echo_detector; + echo_detector.SetReliabilityForTest(1.0f); + std::vector<float> ones(160, 1.f); + std::vector<float> zeros(160, 0.f); + + // In this test the capture signal is always zero, so no echo should be + // detected. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector.AnalyzeRenderAudio(ones); + } else { + echo_detector.AnalyzeRenderAudio(zeros); + } + echo_detector.AnalyzeCaptureAudio(zeros); + } + // We expect to not detect any echo. + EXPECT_NEAR(0.f, echo_detector.echo_likelihood(), 0.01f); +} + +TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) { + ResidualEchoDetector echo_detector; + echo_detector.SetReliabilityForTest(1.0f); + std::vector<float> ones(160, 1.f); + std::vector<float> zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. There is a simulated clock drift of 1% in this test, with + // the render side producing data slightly faster. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector.AnalyzeRenderAudio(ones); + echo_detector.AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector.AnalyzeRenderAudio(zeros); + echo_detector.AnalyzeCaptureAudio(ones); + } else { + echo_detector.AnalyzeRenderAudio(zeros); + echo_detector.AnalyzeCaptureAudio(zeros); + } + if (i % 100 == 0) { + // This is causing the simulated clock drift. + echo_detector.AnalyzeRenderAudio(zeros); + } + } + // We expect to detect echo with high likelihood. Clock drift is harder to + // correct on the render side than on the capture side. This is due to the + // render buffer, clock drift can only be discovered after a certain delay. + // A growing buffer can be caused by jitter or clock drift and it's not + // possible to make this decision right away. For this reason we only expect + // an echo likelihood of 75% in this test. + EXPECT_GT(echo_detector.echo_likelihood(), 0.75f); +} + +TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) { + ResidualEchoDetector echo_detector; + echo_detector.SetReliabilityForTest(1.0f); + std::vector<float> ones(160, 1.f); + std::vector<float> zeros(160, 0.f); + + // In this test the capture signal has a delay of 10 frames w.r.t. the render + // signal, but is otherwise identical. Both signals are periodic with a 20 + // frame interval. There is a simulated clock drift of 1% in this test, with + // the capture side producing data slightly faster. + for (int i = 0; i < 1000; i++) { + if (i % 20 == 0) { + echo_detector.AnalyzeRenderAudio(ones); + echo_detector.AnalyzeCaptureAudio(zeros); + } else if (i % 20 == 10) { + echo_detector.AnalyzeRenderAudio(zeros); + echo_detector.AnalyzeCaptureAudio(ones); + } else { + echo_detector.AnalyzeRenderAudio(zeros); + echo_detector.AnalyzeCaptureAudio(zeros); + } + if (i % 100 == 0) { + // This is causing the simulated clock drift. + echo_detector.AnalyzeCaptureAudio(zeros); + } + } + // We expect to detect echo with near certain likelihood. + EXPECT_NEAR(1.f, echo_detector.echo_likelihood(), 0.01f); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level.cc new file mode 100644 index 0000000000..55db226af2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/rms_level.h" + +#include <math.h> +#include <algorithm> +#include <numeric> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { +static constexpr float kMaxSquaredLevel = 32768 * 32768; +// kMinLevel is the level corresponding to kMinLevelDb, that is 10^(-127/10). +static constexpr float kMinLevel = 1.995262314968883e-13f; + +// Calculates the normalized RMS value from a mean square value. The input +// should be the sum of squared samples divided by the number of samples. The +// value will be normalized to full range before computing the RMS, wich is +// returned as a negated dBfs. That is, 0 is full amplitude while 127 is very +// faint. +int ComputeRms(float mean_square) { + if (mean_square <= kMinLevel * kMaxSquaredLevel) { + // Very faint; simply return the minimum value. + return RmsLevel::kMinLevelDb; + } + // Normalize by the max level. + const float mean_square_norm = mean_square / kMaxSquaredLevel; + RTC_DCHECK_GT(mean_square_norm, kMinLevel); + // 20log_10(x^0.5) = 10log_10(x) + const float rms = 10.f * log10(mean_square_norm); + RTC_DCHECK_LE(rms, 0.f); + RTC_DCHECK_GT(rms, -RmsLevel::kMinLevelDb); + // Return the negated value. + return static_cast<int>(-rms + 0.5f); +} +} // namespace + +RmsLevel::RmsLevel() { + Reset(); +} + +RmsLevel::~RmsLevel() = default; + +void RmsLevel::Reset() { + sum_square_ = 0.f; + sample_count_ = 0; + max_sum_square_ = 0.f; + block_size_ = rtc::nullopt; +} + +void RmsLevel::Analyze(rtc::ArrayView<const int16_t> data) { + if (data.empty()) { + return; + } + + CheckBlockSize(data.size()); + + const float sum_square = + std::accumulate(data.begin(), data.end(), 0.f, + [](float a, int16_t b) { return a + b * b; }); + RTC_DCHECK_GE(sum_square, 0.f); + sum_square_ += sum_square; + sample_count_ += data.size(); + + max_sum_square_ = std::max(max_sum_square_, sum_square); +} + +void RmsLevel::AnalyzeMuted(size_t length) { + CheckBlockSize(length); + sample_count_ += length; +} + +int RmsLevel::Average() { + int rms = (sample_count_ == 0) ? RmsLevel::kMinLevelDb + : ComputeRms(sum_square_ / sample_count_); + Reset(); + return rms; +} + +RmsLevel::Levels RmsLevel::AverageAndPeak() { + // Note that block_size_ should by design always be non-empty when + // sample_count_ != 0. Also, the * operator of rtc::Optional enforces this + // with a DCHECK. + Levels levels = (sample_count_ == 0) + ? Levels{RmsLevel::kMinLevelDb, RmsLevel::kMinLevelDb} + : Levels{ComputeRms(sum_square_ / sample_count_), + ComputeRms(max_sum_square_ / *block_size_)}; + Reset(); + return levels; +} + +void RmsLevel::CheckBlockSize(size_t block_size) { + if (block_size_ != block_size) { + Reset(); + block_size_ = block_size; + } +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level.h b/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level.h new file mode 100644 index 0000000000..6fe22fd120 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ +#define MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ + +#include "api/array_view.h" +#include "api/optional.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +// Computes the root mean square (RMS) level in dBFs (decibels from digital +// full-scale) of audio data. The computation follows RFC 6465: +// https://tools.ietf.org/html/rfc6465 +// with the intent that it can provide the RTP audio level indication. +// +// The expected approach is to provide constant-sized chunks of audio to +// Analyze(). When enough chunks have been accumulated to form a packet, call +// Average() to get the audio level indicator for the RTP header. +class RmsLevel { + public: + struct Levels { + int average; + int peak; + }; + + static constexpr int kMinLevelDb = 127; + + RmsLevel(); + ~RmsLevel(); + + // Can be called to reset internal states, but is not required during normal + // operation. + void Reset(); + + // Pass each chunk of audio to Analyze() to accumulate the level. + void Analyze(rtc::ArrayView<const int16_t> data); + + // If all samples with the given |length| have a magnitude of zero, this is + // a shortcut to avoid some computation. + void AnalyzeMuted(size_t length); + + // Computes the RMS level over all data passed to Analyze() since the last + // call to Average(). The returned value is positive but should be interpreted + // as negative as per the RFC. It is constrained to [0, 127]. Resets the + // internal state to start a new measurement period. + int Average(); + + // Like Average() above, but also returns the RMS peak value. Resets the + // internal state to start a new measurement period. + Levels AverageAndPeak(); + + private: + // Compares |block_size| with |block_size_|. If they are different, calls + // Reset() and stores the new size. + void CheckBlockSize(size_t block_size); + + float sum_square_; + size_t sample_count_; + float max_sum_square_; + rtc::Optional<size_t> block_size_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ + diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level_unittest.cc new file mode 100644 index 0000000000..cf7683d1c8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/rms_level_unittest.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <cmath> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/rms_level.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/mathutils.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { +constexpr int kSampleRateHz = 48000; +constexpr size_t kBlockSizeSamples = kSampleRateHz / 100; + +std::unique_ptr<RmsLevel> RunTest(rtc::ArrayView<const int16_t> input) { + std::unique_ptr<RmsLevel> level(new RmsLevel); + for (size_t n = 0; n + kBlockSizeSamples <= input.size(); + n += kBlockSizeSamples) { + level->Analyze(input.subview(n, kBlockSizeSamples)); + } + return level; +} + +std::vector<int16_t> CreateSinusoid(int frequency_hz, + int amplitude, + size_t num_samples) { + std::vector<int16_t> x(num_samples); + for (size_t n = 0; n < num_samples; ++n) { + x[n] = rtc::saturated_cast<int16_t>( + amplitude * std::sin(2 * M_PI * n * frequency_hz / kSampleRateHz)); + } + return x; +} +} // namespace + +TEST(RmsLevelTest, Run1000HzFullScale) { + auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + EXPECT_EQ(3, level->Average()); // -3 dBFS +} + +TEST(RmsLevelTest, Run1000HzFullScaleAverageAndPeak) { + auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(3, stats.average); // -3 dBFS + EXPECT_EQ(3, stats.peak); +} + +TEST(RmsLevelTest, Run1000HzHalfScale) { + auto x = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto level = RunTest(x); + EXPECT_EQ(9, level->Average()); // -9 dBFS +} + +TEST(RmsLevelTest, RunZeros) { + std::vector<int16_t> x(kSampleRateHz, 0); // 1 second of pure silence. + auto level = RunTest(x); + EXPECT_EQ(127, level->Average()); +} + +TEST(RmsLevelTest, RunZerosAverageAndPeak) { + std::vector<int16_t> x(kSampleRateHz, 0); // 1 second of pure silence. + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(127, stats.average); + EXPECT_EQ(127, stats.peak); +} + +TEST(RmsLevelTest, NoSamples) { + RmsLevel level; + EXPECT_EQ(127, level.Average()); // Return minimum if no samples are given. +} + +TEST(RmsLevelTest, NoSamplesAverageAndPeak) { + RmsLevel level; + auto stats = level.AverageAndPeak(); + EXPECT_EQ(127, stats.average); + EXPECT_EQ(127, stats.peak); +} + +TEST(RmsLevelTest, PollTwice) { + auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + level->Average(); + EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. +} + +TEST(RmsLevelTest, Reset) { + auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + level->Reset(); + EXPECT_EQ(127, level->Average()); // Stats should be reset at this point. +} + +// Inserts 1 second of full-scale sinusoid, followed by 1 second of muted. +TEST(RmsLevelTest, ProcessMuted) { + auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + const size_t kBlocksPerSecond = rtc::CheckedDivExact( + static_cast<size_t>(kSampleRateHz), kBlockSizeSamples); + for (size_t i = 0; i < kBlocksPerSecond; ++i) { + level->AnalyzeMuted(kBlockSizeSamples); + } + EXPECT_EQ(6, level->Average()); // Average RMS halved due to the silence. +} + +// Inserts 1 second of half-scale sinusoid, follwed by 10 ms of full-scale, and +// finally 1 second of half-scale again. Expect the average to be -9 dBFS due +// to the vast majority of the signal being half-scale, and the peak to be +// -3 dBFS. +TEST(RmsLevelTest, RunHalfScaleAndInsertFullScale) { + auto half_scale = CreateSinusoid(1000, INT16_MAX / 2, kSampleRateHz); + auto full_scale = CreateSinusoid(1000, INT16_MAX, kSampleRateHz / 100); + auto x = half_scale; + x.insert(x.end(), full_scale.begin(), full_scale.end()); + x.insert(x.end(), half_scale.begin(), half_scale.end()); + ASSERT_EQ(static_cast<size_t>(2 * kSampleRateHz + kSampleRateHz / 100), + x.size()); + auto level = RunTest(x); + auto stats = level->AverageAndPeak(); + EXPECT_EQ(9, stats.average); + EXPECT_EQ(3, stats.peak); +} + +TEST(RmsLevelTest, ResetOnBlockSizeChange) { + auto x = CreateSinusoid(1000, INT16_MAX, kSampleRateHz); + auto level = RunTest(x); + // Create a new signal with half amplitude, but double block length. + auto y = CreateSinusoid(1000, INT16_MAX / 2, kBlockSizeSamples * 2); + level->Analyze(y); + auto stats = level->AverageAndPeak(); + // Expect all stats to only be influenced by the last signal (y), since the + // changed block size should reset the stats. + EXPECT_EQ(9, stats.average); + EXPECT_EQ(9, stats.peak); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter.cc new file mode 100644 index 0000000000..e2b8f82c36 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/splitting_filter.h" + +#include "common_audio/channel_buffer.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +SplittingFilter::SplittingFilter(size_t num_channels, + size_t num_bands, + size_t num_frames) + : num_bands_(num_bands) { + RTC_CHECK(num_bands_ == 2 || num_bands_ == 3); + if (num_bands_ == 2) { + two_bands_states_.resize(num_channels); + } else if (num_bands_ == 3) { + for (size_t i = 0; i < num_channels; ++i) { + three_band_filter_banks_.push_back(std::unique_ptr<ThreeBandFilterBank>( + new ThreeBandFilterBank(num_frames))); + } + } +} + +SplittingFilter::~SplittingFilter() = default; + +void SplittingFilter::Analysis(const IFChannelBuffer* data, + IFChannelBuffer* bands) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsAnalysis(data, bands); + } else if (bands->num_bands() == 3) { + ThreeBandsAnalysis(data, bands); + } +} + +void SplittingFilter::Synthesis(const IFChannelBuffer* bands, + IFChannelBuffer* data) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsSynthesis(bands, data); + } else if (bands->num_bands() == 3) { + ThreeBandsSynthesis(bands, data); + } +} + +void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, + IFChannelBuffer* bands) { + RTC_DCHECK_EQ(two_bands_states_.size(), data->num_channels()); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { + WebRtcSpl_AnalysisQMF(data->ibuf_const()->channels()[i], + data->num_frames(), + bands->ibuf()->channels(0)[i], + bands->ibuf()->channels(1)[i], + two_bands_states_[i].analysis_state1, + two_bands_states_[i].analysis_state2); + } +} + +void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, + IFChannelBuffer* data) { + RTC_DCHECK_LE(data->num_channels(), two_bands_states_.size()); + for (size_t i = 0; i < data->num_channels(); ++i) { + WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i], + bands->ibuf_const()->channels(1)[i], + bands->num_frames_per_band(), + data->ibuf()->channels()[i], + two_bands_states_[i].synthesis_state1, + two_bands_states_[i].synthesis_state2); + } +} + +void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data, + IFChannelBuffer* bands) { + RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i]->Analysis(data->fbuf_const()->channels()[i], + data->num_frames(), + bands->fbuf()->bands(i)); + } +} + +void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* bands, + IFChannelBuffer* data) { + RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); + for (size_t i = 0; i < data->num_channels(); ++i) { + three_band_filter_banks_[i]->Synthesis(bands->fbuf_const()->bands(i), + bands->num_frames_per_band(), + data->fbuf()->channels()[i]); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter.h new file mode 100644 index 0000000000..7d60c82ff6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ + +#include <cstring> +#include <memory> +#include <vector> + +#include "modules/audio_processing/three_band_filter_bank.h" + +namespace webrtc { + +class IFChannelBuffer; + +struct TwoBandsStates { + TwoBandsStates() { + memset(analysis_state1, 0, sizeof(analysis_state1)); + memset(analysis_state2, 0, sizeof(analysis_state2)); + memset(synthesis_state1, 0, sizeof(synthesis_state1)); + memset(synthesis_state2, 0, sizeof(synthesis_state2)); + } + + static const int kStateSize = 6; + int analysis_state1[kStateSize]; + int analysis_state2[kStateSize]; + int synthesis_state1[kStateSize]; + int synthesis_state2[kStateSize]; +}; + +// Splitting filter which is able to split into and merge from 2 or 3 frequency +// bands. The number of channels needs to be provided at construction time. +// +// For each block, Analysis() is called to split into bands and then Synthesis() +// to merge these bands again. The input and output signals are contained in +// IFChannelBuffers and for the different bands an array of IFChannelBuffers is +// used. +class SplittingFilter { + public: + SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames); + ~SplittingFilter(); + + void Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands); + void Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + + private: + // Two-band analysis and synthesis work for 640 samples or less. + void TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); + void TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + void ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); + void ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + void InitBuffers(); + + const size_t num_bands_; + std::vector<TwoBandsStates> two_bands_states_; + std::vector<std::unique_ptr<ThreeBandFilterBank>> three_band_filter_banks_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter_unittest.cc new file mode 100644 index 0000000000..3e0dbb9b9c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/splitting_filter_unittest.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include <cmath> + +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/splitting_filter.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const size_t kSamplesPer16kHzChannel = 160; +const size_t kSamplesPer48kHzChannel = 480; + +} // namespace + +// Generates a signal from presence or absence of sine waves of different +// frequencies. +// Splits into 3 bands and checks their presence or absence. +// Recombines the bands. +// Calculates the delay. +// Checks that the cross correlation of input and output is high enough at the +// calculated delay. +TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { + static const int kChannels = 1; + static const int kSampleRateHz = 48000; + static const size_t kNumBands = 3; + static const int kFrequenciesHz[kNumBands] = {1000, 12000, 18000}; + static const float kAmplitude = 8192.f; + static const size_t kChunks = 8; + SplittingFilter splitting_filter(kChannels, + kNumBands, + kSamplesPer48kHzChannel); + IFChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + IFChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands); + IFChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + for (size_t i = 0; i < kChunks; ++i) { + // Input signal generation. + bool is_present[kNumBands]; + memset(in_data.fbuf()->channels()[0], + 0, + kSamplesPer48kHzChannel * sizeof(in_data.fbuf()->channels()[0][0])); + for (size_t j = 0; j < kNumBands; ++j) { + is_present[j] = i & (static_cast<size_t>(1) << j); + float amplitude = is_present[j] ? kAmplitude : 0.f; + for (size_t k = 0; k < kSamplesPer48kHzChannel; ++k) { + in_data.fbuf()->channels()[0][k] += + amplitude * sin(2.f * M_PI * kFrequenciesHz[j] * + (i * kSamplesPer48kHzChannel + k) / kSampleRateHz); + } + } + // Three band splitting filter. + splitting_filter.Analysis(&in_data, &bands); + // Energy calculation. + float energy[kNumBands]; + for (size_t j = 0; j < kNumBands; ++j) { + energy[j] = 0.f; + for (size_t k = 0; k < kSamplesPer16kHzChannel; ++k) { + energy[j] += bands.fbuf_const()->channels(j)[0][k] * + bands.fbuf_const()->channels(j)[0][k]; + } + energy[j] /= kSamplesPer16kHzChannel; + if (is_present[j]) { + EXPECT_GT(energy[j], kAmplitude * kAmplitude / 4); + } else { + EXPECT_LT(energy[j], kAmplitude * kAmplitude / 4); + } + } + // Three band merge. + splitting_filter.Synthesis(&bands, &out_data); + // Delay and cross correlation estimation. + float xcorr = 0.f; + for (size_t delay = 0; delay < kSamplesPer48kHzChannel; ++delay) { + float tmpcorr = 0.f; + for (size_t j = delay; j < kSamplesPer48kHzChannel; ++j) { + tmpcorr += in_data.fbuf_const()->channels()[0][j - delay] * + out_data.fbuf_const()->channels()[0][j]; + } + tmpcorr /= kSamplesPer48kHzChannel; + if (tmpcorr > xcorr) { + xcorr = tmpcorr; + } + } + // High cross correlation check. + bool any_present = false; + for (size_t j = 0; j < kNumBands; ++j) { + any_present |= is_present[j]; + } + if (any_present) { + EXPECT_GT(xcorr, kAmplitude * kAmplitude / 4); + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc new file mode 100644 index 0000000000..6d0b07c7ed --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc @@ -0,0 +1,557 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <iostream> + +#include "modules/audio_processing/test/aec_dump_based_simulator.h" + +#include "modules/audio_processing/test/protobuf_utils.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { +namespace { + +// Verify output bitexactness for the fixed interface. +// TODO(peah): Check whether it would make sense to add a threshold +// to use for checking the bitexactness in a soft manner. +bool VerifyFixedBitExactness(const webrtc::audioproc::Stream& msg, + const AudioFrame& frame) { + if ((sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_) != + msg.output_data().size()) { + return false; + } else { + const int16_t* frame_data = frame.data(); + for (size_t k = 0; k < frame.num_channels_ * frame.samples_per_channel_; + ++k) { + if (msg.output_data().data()[k] != frame_data[k]) { + return false; + } + } + } + return true; +} + +// Verify output bitexactness for the float interface. +bool VerifyFloatBitExactness(const webrtc::audioproc::Stream& msg, + const StreamConfig& out_config, + const ChannelBuffer<float>& out_buf) { + if (static_cast<size_t>(msg.output_channel_size()) != + out_config.num_channels() || + msg.output_channel(0).size() != out_config.num_frames()) { + return false; + } else { + for (int ch = 0; ch < msg.output_channel_size(); ++ch) { + for (size_t sample = 0; sample < out_config.num_frames(); ++sample) { + if (msg.output_channel(ch).data()[sample] != + out_buf.channels()[ch][sample]) { + return false; + } + } + } + } + return true; +} + +} // namespace + +AecDumpBasedSimulator::AecDumpBasedSimulator(const SimulationSettings& settings) + : AudioProcessingSimulator(settings) {} + +AecDumpBasedSimulator::~AecDumpBasedSimulator() = default; + +void AecDumpBasedSimulator::PrepareProcessStreamCall( + const webrtc::audioproc::Stream& msg) { + if (msg.has_input_data()) { + // Fixed interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFixedInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFixedInterface; + + // Populate input buffer. + RTC_CHECK_EQ(sizeof(*fwd_frame_.data()) * fwd_frame_.samples_per_channel_ * + fwd_frame_.num_channels_, + msg.input_data().size()); + memcpy(fwd_frame_.mutable_data(), msg.input_data().data(), + msg.input_data().size()); + } else { + // Float interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFloatInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFloatInterface; + + RTC_CHECK_EQ(in_buf_->num_channels(), + static_cast<size_t>(msg.input_channel_size())); + + // Populate input buffer. + for (size_t i = 0; i < in_buf_->num_channels(); ++i) { + RTC_CHECK_EQ(in_buf_->num_frames() * sizeof(*in_buf_->channels()[i]), + msg.input_channel(i).size()); + std::memcpy(in_buf_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + } + + if (artificial_nearend_buffer_reader_) { + if (artificial_nearend_buffer_reader_->Read( + artificial_nearend_buf_.get())) { + if (msg.has_input_data()) { + int16_t* fwd_frame_data = fwd_frame_.mutable_data(); + for (size_t k = 0; k < in_buf_->num_frames(); ++k) { + fwd_frame_data[k] = rtc::saturated_cast<int16_t>( + fwd_frame_data[k] + + static_cast<int16_t>(32767 * + artificial_nearend_buf_->channels()[0][k])); + } + } else { + for (int i = 0; i < msg.input_channel_size(); ++i) { + for (size_t k = 0; k < in_buf_->num_frames(); ++k) { + in_buf_->channels()[i][k] += + artificial_nearend_buf_->channels()[0][k]; + in_buf_->channels()[i][k] = std::min( + 32767.f, std::max(-32768.f, in_buf_->channels()[i][k])); + } + } + } + } else { + if (!artificial_nearend_eof_reported_) { + std::cout << "The artificial nearend file ended before the recording."; + artificial_nearend_eof_reported_ = true; + } + } + } + + if (!settings_.stream_delay) { + if (msg.has_delay()) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms(msg.delay())); + } + } else { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms(*settings_.stream_delay)); + } + + if (!settings_.stream_drift_samples) { + if (msg.has_drift()) { + ap_->echo_cancellation()->set_stream_drift_samples(msg.drift()); + } + } else { + ap_->echo_cancellation()->set_stream_drift_samples( + *settings_.stream_drift_samples); + } + + if (!settings_.use_ts) { + if (msg.has_keypress()) { + ap_->set_stream_key_pressed(msg.keypress()); + } + } else { + ap_->set_stream_key_pressed(*settings_.use_ts); + } + + // Level is always logged in AEC dumps. + RTC_CHECK(msg.has_level()); + aec_dump_mic_level_ = msg.level(); +} + +void AecDumpBasedSimulator::VerifyProcessStreamBitExactness( + const webrtc::audioproc::Stream& msg) { + if (bitexact_output_) { + if (interface_used_ == InterfaceType::kFixedInterface) { + bitexact_output_ = VerifyFixedBitExactness(msg, fwd_frame_); + } else { + bitexact_output_ = VerifyFloatBitExactness(msg, out_config_, *out_buf_); + } + } +} + +void AecDumpBasedSimulator::PrepareReverseProcessStreamCall( + const webrtc::audioproc::ReverseStream& msg) { + if (msg.has_data()) { + // Fixed interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFixedInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFixedInterface; + + // Populate input buffer. + RTC_CHECK_EQ(sizeof(int16_t) * rev_frame_.samples_per_channel_ * + rev_frame_.num_channels_, + msg.data().size()); + memcpy(rev_frame_.mutable_data(), msg.data().data(), msg.data().size()); + } else { + // Float interface processing. + // Verify interface invariance. + RTC_CHECK(interface_used_ == InterfaceType::kFloatInterface || + interface_used_ == InterfaceType::kNotSpecified); + interface_used_ = InterfaceType::kFloatInterface; + + RTC_CHECK_EQ(reverse_in_buf_->num_channels(), + static_cast<size_t>(msg.channel_size())); + + // Populate input buffer. + for (int i = 0; i < msg.channel_size(); ++i) { + RTC_CHECK_EQ(reverse_in_buf_->num_frames() * + sizeof(*reverse_in_buf_->channels()[i]), + msg.channel(i).size()); + std::memcpy(reverse_in_buf_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + } +} + +void AecDumpBasedSimulator::Process() { + CreateAudioProcessor(); + dump_input_file_ = OpenFile(settings_.aec_dump_input_filename->c_str(), "rb"); + + if (settings_.artificial_nearend_filename) { + std::unique_ptr<WavReader> artificial_nearend_file( + new WavReader(settings_.artificial_nearend_filename->c_str())); + + RTC_CHECK_EQ(1, artificial_nearend_file->num_channels()) + << "Only mono files for the artificial nearend are supported, " + "reverted to not using the artificial nearend file"; + + const int sample_rate_hz = artificial_nearend_file->sample_rate(); + artificial_nearend_buffer_reader_.reset( + new ChannelBufferWavReader(std::move(artificial_nearend_file))); + artificial_nearend_buf_.reset(new ChannelBuffer<float>( + rtc::CheckedDivExact(sample_rate_hz, kChunksPerSecond), 1)); + } + + webrtc::audioproc::Event event_msg; + int num_forward_chunks_processed = 0; + while (ReadMessageFromFile(dump_input_file_, &event_msg)) { + switch (event_msg.type()) { + case webrtc::audioproc::Event::INIT: + RTC_CHECK(event_msg.has_init()); + HandleMessage(event_msg.init()); + break; + case webrtc::audioproc::Event::STREAM: + RTC_CHECK(event_msg.has_stream()); + HandleMessage(event_msg.stream()); + ++num_forward_chunks_processed; + break; + case webrtc::audioproc::Event::REVERSE_STREAM: + RTC_CHECK(event_msg.has_reverse_stream()); + HandleMessage(event_msg.reverse_stream()); + break; + case webrtc::audioproc::Event::CONFIG: + RTC_CHECK(event_msg.has_config()); + HandleMessage(event_msg.config()); + break; + default: + RTC_CHECK(false); + } + } + + fclose(dump_input_file_); + + DestroyAudioProcessor(); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::Config& msg) { + if (settings_.use_verbose_logging) { + std::cout << "Config at frame:" << std::endl; + std::cout << " Forward: " << get_num_process_stream_calls() << std::endl; + std::cout << " Reverse: " << get_num_reverse_process_stream_calls() + << std::endl; + } + + if (!settings_.discard_all_settings_in_aecdump) { + if (settings_.use_verbose_logging) { + std::cout << "Setting used in config:" << std::endl; + } + Config config; + AudioProcessing::Config apm_config; + + if (msg.has_aec_enabled() || settings_.use_aec) { + bool enable = settings_.use_aec ? *settings_.use_aec : msg.aec_enabled(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_cancellation()->Enable(enable)); + if (settings_.use_verbose_logging) { + std::cout << " aec_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_aec_delay_agnostic_enabled() || settings_.use_delay_agnostic) { + bool enable = settings_.use_delay_agnostic + ? *settings_.use_delay_agnostic + : msg.aec_delay_agnostic_enabled(); + config.Set<DelayAgnostic>(new DelayAgnostic(enable)); + if (settings_.use_verbose_logging) { + std::cout << " aec_delay_agnostic_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_aec_drift_compensation_enabled() || + settings_.use_drift_compensation) { + bool enable = settings_.use_drift_compensation + ? *settings_.use_drift_compensation + : msg.aec_drift_compensation_enabled(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_cancellation()->enable_drift_compensation(enable)); + if (settings_.use_verbose_logging) { + std::cout << " aec_drift_compensation_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_aec_extended_filter_enabled() || + settings_.use_extended_filter) { + bool enable = settings_.use_extended_filter + ? *settings_.use_extended_filter + : msg.aec_extended_filter_enabled(); + config.Set<ExtendedFilter>(new ExtendedFilter(enable)); + if (settings_.use_verbose_logging) { + std::cout << " aec_extended_filter_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_aec_suppression_level() || settings_.aec_suppression_level) { + int level = settings_.aec_suppression_level + ? *settings_.aec_suppression_level + : msg.aec_suppression_level(); + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->echo_cancellation()->set_suppression_level( + static_cast<webrtc::EchoCancellation::SuppressionLevel>(level))); + if (settings_.use_verbose_logging) { + std::cout << " aec_suppression_level: " << level << std::endl; + } + } + + if (msg.has_aecm_enabled() || settings_.use_aecm) { + bool enable = + settings_.use_aecm ? *settings_.use_aecm : msg.aecm_enabled(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_control_mobile()->Enable(enable)); + if (settings_.use_verbose_logging) { + std::cout << " aecm_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_aecm_comfort_noise_enabled() || + settings_.use_aecm_comfort_noise) { + bool enable = settings_.use_aecm_comfort_noise + ? *settings_.use_aecm_comfort_noise + : msg.aecm_comfort_noise_enabled(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_control_mobile()->enable_comfort_noise(enable)); + if (settings_.use_verbose_logging) { + std::cout << " aecm_comfort_noise_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_aecm_routing_mode() || settings_.aecm_routing_mode) { + int routing_mode = settings_.aecm_routing_mode + ? *settings_.aecm_routing_mode + : msg.aecm_routing_mode(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_control_mobile()->set_routing_mode( + static_cast<webrtc::EchoControlMobile::RoutingMode>( + routing_mode))); + if (settings_.use_verbose_logging) { + std::cout << " aecm_routing_mode: " << routing_mode << std::endl; + } + } + + if (msg.has_agc_enabled() || settings_.use_agc) { + bool enable = settings_.use_agc ? *settings_.use_agc : msg.agc_enabled(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->Enable(enable)); + if (settings_.use_verbose_logging) { + std::cout << " agc_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_agc_mode() || settings_.agc_mode) { + int mode = settings_.agc_mode ? *settings_.agc_mode : msg.agc_mode(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->set_mode( + static_cast<webrtc::GainControl::Mode>(mode))); + if (settings_.use_verbose_logging) { + std::cout << " agc_mode: " << mode << std::endl; + } + } + + if (msg.has_agc_limiter_enabled() || settings_.use_agc_limiter) { + bool enable = settings_.use_agc_limiter ? *settings_.use_agc_limiter + : msg.agc_limiter_enabled(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->enable_limiter(enable)); + if (settings_.use_verbose_logging) { + std::cout << " agc_limiter_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + // TODO(peah): Add support for controlling the Experimental AGC from the + // command line. + if (msg.has_noise_robust_agc_enabled()) { + config.Set<ExperimentalAgc>( + new ExperimentalAgc(msg.noise_robust_agc_enabled())); + if (settings_.use_verbose_logging) { + std::cout << " noise_robust_agc_enabled: " + << (msg.noise_robust_agc_enabled() ? "true" : "false") + << std::endl; + } + } + + if (msg.has_transient_suppression_enabled() || settings_.use_ts) { + bool enable = settings_.use_ts ? *settings_.use_ts + : msg.transient_suppression_enabled(); + config.Set<ExperimentalNs>(new ExperimentalNs(enable)); + if (settings_.use_verbose_logging) { + std::cout << " transient_suppression_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_intelligibility_enhancer_enabled() || settings_.use_ie) { + bool enable = settings_.use_ie ? *settings_.use_ie + : msg.intelligibility_enhancer_enabled(); + config.Set<Intelligibility>(new Intelligibility(enable)); + if (settings_.use_verbose_logging) { + std::cout << " intelligibility_enhancer_enabled: " + << (enable ? "true" : "false") << std::endl; + } + } + + if (msg.has_hpf_enabled() || settings_.use_hpf) { + bool enable = settings_.use_hpf ? *settings_.use_hpf : msg.hpf_enabled(); + apm_config.high_pass_filter.enabled = enable; + if (settings_.use_verbose_logging) { + std::cout << " hpf_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_ns_enabled() || settings_.use_ns) { + bool enable = settings_.use_ns ? *settings_.use_ns : msg.ns_enabled(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->noise_suppression()->Enable(enable)); + if (settings_.use_verbose_logging) { + std::cout << " ns_enabled: " << (enable ? "true" : "false") + << std::endl; + } + } + + if (msg.has_ns_level() || settings_.ns_level) { + int level = settings_.ns_level ? *settings_.ns_level : msg.ns_level(); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->noise_suppression()->set_level( + static_cast<NoiseSuppression::Level>(level))); + if (settings_.use_verbose_logging) { + std::cout << " ns_level: " << level << std::endl; + } + } + + if (settings_.use_verbose_logging && msg.has_experiments_description() && + !msg.experiments_description().empty()) { + std::cout << " experiments not included by default in the simulation: " + << msg.experiments_description() << std::endl; + } + + if (settings_.use_refined_adaptive_filter) { + config.Set<RefinedAdaptiveFilter>( + new RefinedAdaptiveFilter(*settings_.use_refined_adaptive_filter)); + } + + if (settings_.use_lc) { + apm_config.level_controller.enabled = *settings_.use_lc; + } + + if (settings_.use_ed) { + apm_config.residual_echo_detector.enabled = *settings_.use_ed; + } + + ap_->ApplyConfig(apm_config); + ap_->SetExtraOptions(config); + } +} + +void AecDumpBasedSimulator::HandleMessage(const webrtc::audioproc::Init& msg) { + RTC_CHECK(msg.has_sample_rate()); + RTC_CHECK(msg.has_num_input_channels()); + RTC_CHECK(msg.has_num_reverse_channels()); + RTC_CHECK(msg.has_reverse_sample_rate()); + + if (settings_.use_verbose_logging) { + std::cout << "Init at frame:" << std::endl; + std::cout << " Forward: " << get_num_process_stream_calls() << std::endl; + std::cout << " Reverse: " << get_num_reverse_process_stream_calls() + << std::endl; + } + + int num_output_channels; + if (settings_.output_num_channels) { + num_output_channels = *settings_.output_num_channels; + } else { + num_output_channels = msg.has_num_output_channels() + ? msg.num_output_channels() + : msg.num_input_channels(); + } + + int output_sample_rate; + if (settings_.output_sample_rate_hz) { + output_sample_rate = *settings_.output_sample_rate_hz; + } else { + output_sample_rate = msg.has_output_sample_rate() ? msg.output_sample_rate() + : msg.sample_rate(); + } + + int num_reverse_output_channels; + if (settings_.reverse_output_num_channels) { + num_reverse_output_channels = *settings_.reverse_output_num_channels; + } else { + num_reverse_output_channels = msg.has_num_reverse_output_channels() + ? msg.num_reverse_output_channels() + : msg.num_reverse_channels(); + } + + int reverse_output_sample_rate; + if (settings_.reverse_output_sample_rate_hz) { + reverse_output_sample_rate = *settings_.reverse_output_sample_rate_hz; + } else { + reverse_output_sample_rate = msg.has_reverse_output_sample_rate() + ? msg.reverse_output_sample_rate() + : msg.reverse_sample_rate(); + } + + SetupBuffersConfigsOutputs( + msg.sample_rate(), output_sample_rate, msg.reverse_sample_rate(), + reverse_output_sample_rate, msg.num_input_channels(), num_output_channels, + msg.num_reverse_channels(), num_reverse_output_channels); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::Stream& msg) { + PrepareProcessStreamCall(msg); + ProcessStream(interface_used_ == InterfaceType::kFixedInterface); + VerifyProcessStreamBitExactness(msg); +} + +void AecDumpBasedSimulator::HandleMessage( + const webrtc::audioproc::ReverseStream& msg) { + PrepareReverseProcessStreamCall(msg); + ProcessReverseStream(interface_used_ == InterfaceType::kFixedInterface); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/aec_dump_based_simulator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/aec_dump_based_simulator.h new file mode 100644 index 0000000000..4c29bf7315 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/aec_dump_based_simulator.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ + +#include "modules/audio_processing/test/audio_processing_simulator.h" + +#include "rtc_base/constructormagic.h" +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "modules/audio_processing/debug.pb.h" +#endif +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { +namespace test { + +// Used to perform an audio processing simulation from an aec dump. +class AecDumpBasedSimulator final : public AudioProcessingSimulator { + public: + explicit AecDumpBasedSimulator(const SimulationSettings& settings); + ~AecDumpBasedSimulator() override; + + // Processes the messages in the aecdump file. + void Process() override; + + private: + void HandleMessage(const webrtc::audioproc::Init& msg); + void HandleMessage(const webrtc::audioproc::Stream& msg); + void HandleMessage(const webrtc::audioproc::ReverseStream& msg); + void HandleMessage(const webrtc::audioproc::Config& msg); + void PrepareProcessStreamCall(const webrtc::audioproc::Stream& msg); + void PrepareReverseProcessStreamCall( + const webrtc::audioproc::ReverseStream& msg); + void VerifyProcessStreamBitExactness(const webrtc::audioproc::Stream& msg); + + enum InterfaceType { + kFixedInterface, + kFloatInterface, + kNotSpecified, + }; + + FILE* dump_input_file_; + std::unique_ptr<ChannelBuffer<float>> artificial_nearend_buf_; + std::unique_ptr<ChannelBufferWavReader> artificial_nearend_buffer_reader_; + bool artificial_nearend_eof_reported_ = false; + InterfaceType interface_used_ = InterfaceType::kNotSpecified; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AecDumpBasedSimulator); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AEC_DUMP_BASED_SIMULATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml new file mode 100644 index 0000000000..c6063b3d76 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml @@ -0,0 +1,30 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- BEGIN_INCLUDE(manifest) --> +<manifest xmlns:android="http://schemas.android.com/apk/res/android" + package="com.example.native_activity" + android:versionCode="1" + android:versionName="1.0"> + + <!-- This is the platform API where NativeActivity was introduced. --> + <uses-sdk android:minSdkVersion="8" /> + + <!-- This .apk has no Java code itself, so set hasCode to false. --> + <application android:label="@string/app_name" android:hasCode="false" android:debuggable="true"> + + <!-- Our activity is the built-in NativeActivity framework class. + This will take care of integrating with our NDK code. --> + <activity android:name="android.app.NativeActivity" + android:label="@string/app_name" + android:configChanges="orientation|keyboardHidden"> + <!-- Tell NativeActivity the name of or .so --> + <meta-data android:name="android.app.lib_name" + android:value="apmtest-activity" /> + <intent-filter> + <action android:name="android.intent.action.MAIN" /> + <category android:name="android.intent.category.LAUNCHER" /> + </intent-filter> + </activity> + </application> + +</manifest> +<!-- END_INCLUDE(manifest) --> diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/default.properties b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/default.properties new file mode 100644 index 0000000000..9a2c9f6c88 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/default.properties @@ -0,0 +1,11 @@ +# This file is automatically generated by Android Tools. +# Do not modify this file -- YOUR CHANGES WILL BE ERASED! +# +# This file must be checked in Version Control Systems. +# +# To customize properties used by the Ant build system use, +# "build.properties", and override values to adapt the script to your +# project structure. + +# Project target. +target=android-9 diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/jni/main.c b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/jni/main.c new file mode 100644 index 0000000000..2e19635683 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/jni/main.c @@ -0,0 +1,307 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +//BEGIN_INCLUDE(all) +#include <jni.h> +#include <errno.h> + +#include <EGL/egl.h> +#include <GLES/gl.h> + +#include <android/sensor.h> +#include <android/log.h> +#include <android_native_app_glue.h> + +#define LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, "native-activity", __VA_ARGS__)) +#define LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, "native-activity", __VA_ARGS__)) + +/** + * Our saved state data. + */ +struct saved_state { + float angle; + int32_t x; + int32_t y; +}; + +/** + * Shared state for our app. + */ +struct engine { + struct android_app* app; + + ASensorManager* sensorManager; + const ASensor* accelerometerSensor; + ASensorEventQueue* sensorEventQueue; + + int animating; + EGLDisplay display; + EGLSurface surface; + EGLContext context; + int32_t width; + int32_t height; + struct saved_state state; +}; + +/** + * Initialize an EGL context for the current display. + */ +static int engine_init_display(struct engine* engine) { + // initialize OpenGL ES and EGL + + /* + * Here specify the attributes of the desired configuration. + * Below, we select an EGLConfig with at least 8 bits per color + * component compatible with on-screen windows + */ + const EGLint attribs[] = { + EGL_SURFACE_TYPE, EGL_WINDOW_BIT, + EGL_BLUE_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_RED_SIZE, 8, + EGL_NONE + }; + EGLint w, h, dummy, format; + EGLint numConfigs; + EGLConfig config; + EGLSurface surface; + EGLContext context; + + EGLDisplay display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + + eglInitialize(display, 0, 0); + + /* Here, the application chooses the configuration it desires. In this + * sample, we have a very simplified selection process, where we pick + * the first EGLConfig that matches our criteria */ + eglChooseConfig(display, attribs, &config, 1, &numConfigs); + + /* EGL_NATIVE_VISUAL_ID is an attribute of the EGLConfig that is + * guaranteed to be accepted by ANativeWindow_setBuffersGeometry(). + * As soon as we picked a EGLConfig, we can safely reconfigure the + * ANativeWindow buffers to match, using EGL_NATIVE_VISUAL_ID. */ + eglGetConfigAttrib(display, config, EGL_NATIVE_VISUAL_ID, &format); + + ANativeWindow_setBuffersGeometry(engine->app->window, 0, 0, format); + + surface = eglCreateWindowSurface(display, config, engine->app->window, NULL); + context = eglCreateContext(display, config, NULL, NULL); + + if (eglMakeCurrent(display, surface, surface, context) == EGL_FALSE) { + LOGW("Unable to eglMakeCurrent"); + return -1; + } + + eglQuerySurface(display, surface, EGL_WIDTH, &w); + eglQuerySurface(display, surface, EGL_HEIGHT, &h); + + engine->display = display; + engine->context = context; + engine->surface = surface; + engine->width = w; + engine->height = h; + engine->state.angle = 0; + + // Initialize GL state. + glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST); + glEnable(GL_CULL_FACE); + glShadeModel(GL_SMOOTH); + glDisable(GL_DEPTH_TEST); + + return 0; +} + +/** + * Just the current frame in the display. + */ +static void engine_draw_frame(struct engine* engine) { + if (engine->display == NULL) { + // No display. + return; + } + + // Just fill the screen with a color. + glClearColor(((float)engine->state.x)/engine->width, engine->state.angle, + ((float)engine->state.y)/engine->height, 1); + glClear(GL_COLOR_BUFFER_BIT); + + eglSwapBuffers(engine->display, engine->surface); +} + +/** + * Tear down the EGL context currently associated with the display. + */ +static void engine_term_display(struct engine* engine) { + if (engine->display != EGL_NO_DISPLAY) { + eglMakeCurrent(engine->display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + if (engine->context != EGL_NO_CONTEXT) { + eglDestroyContext(engine->display, engine->context); + } + if (engine->surface != EGL_NO_SURFACE) { + eglDestroySurface(engine->display, engine->surface); + } + eglTerminate(engine->display); + } + engine->animating = 0; + engine->display = EGL_NO_DISPLAY; + engine->context = EGL_NO_CONTEXT; + engine->surface = EGL_NO_SURFACE; +} + +/** + * Process the next input event. + */ +static int32_t engine_handle_input(struct android_app* app, AInputEvent* event) { + struct engine* engine = (struct engine*)app->userData; + if (AInputEvent_getType(event) == AINPUT_EVENT_TYPE_MOTION) { + engine->animating = 1; + engine->state.x = AMotionEvent_getX(event, 0); + engine->state.y = AMotionEvent_getY(event, 0); + return 1; + } + return 0; +} + +/** + * Process the next main command. + */ +static void engine_handle_cmd(struct android_app* app, int32_t cmd) { + struct engine* engine = (struct engine*)app->userData; + switch (cmd) { + case APP_CMD_SAVE_STATE: + // The system has asked us to save our current state. Do so. + engine->app->savedState = malloc(sizeof(struct saved_state)); + *((struct saved_state*)engine->app->savedState) = engine->state; + engine->app->savedStateSize = sizeof(struct saved_state); + break; + case APP_CMD_INIT_WINDOW: + // The window is being shown, get it ready. + if (engine->app->window != NULL) { + engine_init_display(engine); + engine_draw_frame(engine); + } + break; + case APP_CMD_TERM_WINDOW: + // The window is being hidden or closed, clean it up. + engine_term_display(engine); + break; + case APP_CMD_GAINED_FOCUS: + // When our app gains focus, we start monitoring the accelerometer. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_enableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + // We'd like to get 60 events per second (in us). + ASensorEventQueue_setEventRate(engine->sensorEventQueue, + engine->accelerometerSensor, (1000L/60)*1000); + } + break; + case APP_CMD_LOST_FOCUS: + // When our app loses focus, we stop monitoring the accelerometer. + // This is to avoid consuming battery while not being used. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_disableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + } + // Also stop animating. + engine->animating = 0; + engine_draw_frame(engine); + break; + } +} + +/** + * This is the main entry point of a native application that is using + * android_native_app_glue. It runs in its own thread, with its own + * event loop for receiving input events and doing other things. + */ +void android_main(struct android_app* state) { + struct engine engine; + + // Make sure glue isn't stripped. + app_dummy(); + + memset(&engine, 0, sizeof(engine)); + state->userData = &engine; + state->onAppCmd = engine_handle_cmd; + state->onInputEvent = engine_handle_input; + engine.app = state; + + // Prepare to monitor accelerometer + engine.sensorManager = ASensorManager_getInstance(); + engine.accelerometerSensor = ASensorManager_getDefaultSensor(engine.sensorManager, + ASENSOR_TYPE_ACCELEROMETER); + engine.sensorEventQueue = ASensorManager_createEventQueue(engine.sensorManager, + state->looper, LOOPER_ID_USER, NULL, NULL); + + if (state->savedState != NULL) { + // We are starting with a previous saved state; restore from it. + engine.state = *(struct saved_state*)state->savedState; + } + + // loop waiting for stuff to do. + + while (1) { + // Read all pending events. + int ident; + int events; + struct android_poll_source* source; + + // If not animating, we will block forever waiting for events. + // If animating, we loop until all events are read, then continue + // to draw the next frame of animation. + while ((ident=ALooper_pollAll(engine.animating ? 0 : -1, NULL, &events, + (void**)&source)) >= 0) { + + // Process this event. + if (source != NULL) { + source->process(state, source); + } + + // If a sensor has data, process it now. + if (ident == LOOPER_ID_USER) { + if (engine.accelerometerSensor != NULL) { + ASensorEvent event; + while (ASensorEventQueue_getEvents(engine.sensorEventQueue, + &event, 1) > 0) { + LOGI("accelerometer: x=%f y=%f z=%f", + event.acceleration.x, event.acceleration.y, + event.acceleration.z); + } + } + } + + // Check if we are exiting. + if (state->destroyRequested != 0) { + engine_term_display(&engine); + return; + } + } + + if (engine.animating) { + // Done with events; draw next animation frame. + engine.state.angle += .01f; + if (engine.state.angle > 1) { + engine.state.angle = 0; + } + + // Drawing is throttled to the screen update rate, so there + // is no need to do timing here. + engine_draw_frame(&engine); + } + } +} +//END_INCLUDE(all) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml new file mode 100644 index 0000000000..d0bd0f3051 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml @@ -0,0 +1,4 @@ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="app_name">apmtest</string> +</resources> diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/apmtest.m b/third_party/libwebrtc/webrtc/modules/audio_processing/test/apmtest.m new file mode 100644 index 0000000000..1c8183c3ec --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/apmtest.m @@ -0,0 +1,365 @@ +% +% Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function apmtest(task, testname, filepath, casenumber, legacy) +%APMTEST is a tool to process APM file sets and easily display the output. +% APMTEST(TASK, TESTNAME, CASENUMBER) performs one of several TASKs: +% 'test' Processes the files to produce test output. +% 'list' Prints a list of cases in the test set, preceded by their +% CASENUMBERs. +% 'show' Uses spclab to show the test case specified by the +% CASENUMBER parameter. +% +% using a set of test files determined by TESTNAME: +% 'all' All tests. +% 'apm' The standard APM test set (default). +% 'apmm' The mobile APM test set. +% 'aec' The AEC test set. +% 'aecm' The AECM test set. +% 'agc' The AGC test set. +% 'ns' The NS test set. +% 'vad' The VAD test set. +% +% FILEPATH specifies the path to the test data files. +% +% CASENUMBER can be used to select a single test case. Omit CASENUMBER, +% or set to zero, to use all test cases. +% + +if nargin < 5 || isempty(legacy) + % Set to true to run old VQE recordings. + legacy = false; +end + +if nargin < 4 || isempty(casenumber) + casenumber = 0; +end + +if nargin < 3 || isempty(filepath) + filepath = 'data/'; +end + +if nargin < 2 || isempty(testname) + testname = 'all'; +end + +if nargin < 1 || isempty(task) + task = 'test'; +end + +if ~strcmp(task, 'test') && ~strcmp(task, 'list') && ~strcmp(task, 'show') + error(['TASK ' task ' is not recognized']); +end + +if casenumber == 0 && strcmp(task, 'show') + error(['CASENUMBER must be specified for TASK ' task]); +end + +inpath = [filepath 'input/']; +outpath = [filepath 'output/']; +refpath = [filepath 'reference/']; + +if strcmp(testname, 'all') + tests = {'apm','apmm','aec','aecm','agc','ns','vad'}; +else + tests = {testname}; +end + +if legacy + progname = './test'; +else + progname = './process_test'; +end + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +if legacy + farFile = 'vqeFar.pcm'; + nearFile = 'vqeNear.pcm'; + eventFile = 'vqeEvent.dat'; + delayFile = 'vqeBuf.dat'; + driftFile = 'vqeDrift.dat'; +else + farFile = 'apm_far.pcm'; + nearFile = 'apm_near.pcm'; + eventFile = 'apm_event.dat'; + delayFile = 'apm_delay.dat'; + driftFile = 'apm_drift.dat'; +end + +simulateMode = false; +nErr = 0; +nCases = 0; +for i=1:length(tests) + simulateMode = false; + + if strcmp(tests{i}, 'apm') + testdir = ['apm/']; + outfile = ['out']; + if legacy + opt = ['-ec 1 -agc 2 -nc 2 -vad 3']; + else + opt = ['--no_progress -hpf' ... + ' -aec --drift_compensation -agc --fixed_digital' ... + ' -ns --ns_moderate -vad']; + end + + elseif strcmp(tests{i}, 'apm-swb') + simulateMode = true; + testdir = ['apm-swb/']; + outfile = ['out']; + if legacy + opt = ['-fs 32000 -ec 1 -agc 2 -nc 2']; + else + opt = ['--no_progress -fs 32000 -hpf' ... + ' -aec --drift_compensation -agc --adaptive_digital' ... + ' -ns --ns_moderate -vad']; + end + elseif strcmp(tests{i}, 'apmm') + testdir = ['apmm/']; + outfile = ['out']; + opt = ['-aec --drift_compensation -agc --fixed_digital -hpf -ns ' ... + '--ns_moderate']; + + else + error(['TESTNAME ' tests{i} ' is not recognized']); + end + + inpathtest = [inpath testdir]; + outpathtest = [outpath testdir]; + refpathtest = [refpath testdir]; + + if ~exist(inpathtest,'dir') + error(['Input directory ' inpathtest ' does not exist']); + end + + if ~exist(refpathtest,'dir') + warning(['Reference directory ' refpathtest ' does not exist']); + end + + [status, errMsg] = mkdir(outpathtest); + if (status == 0) + error(errMsg); + end + + [nErr, nCases] = recurseDir(inpathtest, outpathtest, refpathtest, outfile, ... + progname, opt, simulateMode, nErr, nCases, task, casenumber, legacy); + + if strcmp(task, 'test') || strcmp(task, 'show') + system(['rm ' farFile]); + system(['rm ' nearFile]); + if simulateMode == false + system(['rm ' eventFile]); + system(['rm ' delayFile]); + system(['rm ' driftFile]); + end + end +end + +if ~strcmp(task, 'list') + if nErr == 0 + fprintf(1, '\nAll files are bit-exact to reference\n', nErr); + else + fprintf(1, '\n%d files are NOT bit-exact to reference\n', nErr); + end +end + + +function [nErrOut, nCases] = recurseDir(inpath, outpath, refpath, ... + outfile, progname, opt, simulateMode, nErr, nCases, task, casenumber, ... + legacy) + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +dirs = dir(inpath); +nDirs = 0; +nErrOut = nErr; +for i=3:length(dirs) % skip . and .. + nDirs = nDirs + dirs(i).isdir; +end + + +if nDirs == 0 + nCases = nCases + 1; + + if casenumber == nCases || casenumber == 0 + + if strcmp(task, 'list') + fprintf([num2str(nCases) '. ' outfile '\n']) + else + vadoutfile = ['vad_' outfile '.dat']; + outfile = [outfile '.pcm']; + + % Check for VAD test + vadTest = 0; + if ~isempty(findstr(opt, '-vad')) + vadTest = 1; + if legacy + opt = [opt ' ' outpath vadoutfile]; + else + opt = [opt ' --vad_out_file ' outpath vadoutfile]; + end + end + + if exist([inpath 'vqeFar.pcm']) + system(['ln -s -f ' inpath 'vqeFar.pcm ' farFile]); + elseif exist([inpath 'apm_far.pcm']) + system(['ln -s -f ' inpath 'apm_far.pcm ' farFile]); + end + + if exist([inpath 'vqeNear.pcm']) + system(['ln -s -f ' inpath 'vqeNear.pcm ' nearFile]); + elseif exist([inpath 'apm_near.pcm']) + system(['ln -s -f ' inpath 'apm_near.pcm ' nearFile]); + end + + if exist([inpath 'vqeEvent.dat']) + system(['ln -s -f ' inpath 'vqeEvent.dat ' eventFile]); + elseif exist([inpath 'apm_event.dat']) + system(['ln -s -f ' inpath 'apm_event.dat ' eventFile]); + end + + if exist([inpath 'vqeBuf.dat']) + system(['ln -s -f ' inpath 'vqeBuf.dat ' delayFile]); + elseif exist([inpath 'apm_delay.dat']) + system(['ln -s -f ' inpath 'apm_delay.dat ' delayFile]); + end + + if exist([inpath 'vqeSkew.dat']) + system(['ln -s -f ' inpath 'vqeSkew.dat ' driftFile]); + elseif exist([inpath 'vqeDrift.dat']) + system(['ln -s -f ' inpath 'vqeDrift.dat ' driftFile]); + elseif exist([inpath 'apm_drift.dat']) + system(['ln -s -f ' inpath 'apm_drift.dat ' driftFile]); + end + + if simulateMode == false + command = [progname ' -o ' outpath outfile ' ' opt]; + else + if legacy + inputCmd = [' -in ' nearFile]; + else + inputCmd = [' -i ' nearFile]; + end + + if exist([farFile]) + if legacy + inputCmd = [' -if ' farFile inputCmd]; + else + inputCmd = [' -ir ' farFile inputCmd]; + end + end + command = [progname inputCmd ' -o ' outpath outfile ' ' opt]; + end + % This prevents MATLAB from using its own C libraries. + shellcmd = ['bash -c "unset LD_LIBRARY_PATH;']; + fprintf([command '\n']); + [status, result] = system([shellcmd command '"']); + fprintf(result); + + fprintf(['Reference file: ' refpath outfile '\n']); + + if vadTest == 1 + equal_to_ref = are_files_equal([outpath vadoutfile], ... + [refpath vadoutfile], ... + 'int8'); + if ~equal_to_ref + nErr = nErr + 1; + end + end + + [equal_to_ref, diffvector] = are_files_equal([outpath outfile], ... + [refpath outfile], ... + 'int16'); + if ~equal_to_ref + nErr = nErr + 1; + end + + if strcmp(task, 'show') + % Assume the last init gives the sample rate of interest. + str_idx = strfind(result, 'Sample rate:'); + fs = str2num(result(str_idx(end) + 13:str_idx(end) + 17)); + fprintf('Using %d Hz\n', fs); + + if exist([farFile]) + spclab(fs, farFile, nearFile, [refpath outfile], ... + [outpath outfile], diffvector); + %spclab(fs, diffvector); + else + spclab(fs, nearFile, [refpath outfile], [outpath outfile], ... + diffvector); + %spclab(fs, diffvector); + end + end + end + end +else + + for i=3:length(dirs) + if dirs(i).isdir + [nErr, nCases] = recurseDir([inpath dirs(i).name '/'], outpath, ... + refpath,[outfile '_' dirs(i).name], progname, opt, ... + simulateMode, nErr, nCases, task, casenumber, legacy); + end + end +end +nErrOut = nErr; + +function [are_equal, diffvector] = ... + are_files_equal(newfile, reffile, precision, diffvector) + +are_equal = false; +diffvector = 0; +if ~exist(newfile,'file') + warning(['Output file ' newfile ' does not exist']); + return +end + +if ~exist(reffile,'file') + warning(['Reference file ' reffile ' does not exist']); + return +end + +fid = fopen(newfile,'rb'); +new = fread(fid,inf,precision); +fclose(fid); + +fid = fopen(reffile,'rb'); +ref = fread(fid,inf,precision); +fclose(fid); + +if length(new) ~= length(ref) + warning('Reference is not the same length as output'); + minlength = min(length(new), length(ref)); + new = new(1:minlength); + ref = ref(1:minlength); +end +diffvector = new - ref; + +if isequal(new, ref) + fprintf([newfile ' is bit-exact to reference\n']); + are_equal = true; +else + if isempty(new) + warning([newfile ' is empty']); + return + end + snr = snrseg(new,ref,80); + fprintf('\n'); + are_equal = false; +end diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_buffer_tools.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_buffer_tools.cc new file mode 100644 index 0000000000..0f0e5cd520 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_buffer_tools.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_buffer_tools.h" + +#include <string.h> + +namespace webrtc { +namespace test { + +void SetupFrame(const StreamConfig& stream_config, + std::vector<float*>* frame, + std::vector<float>* frame_samples) { + frame_samples->resize(stream_config.num_channels() * + stream_config.num_frames()); + frame->resize(stream_config.num_channels()); + for (size_t ch = 0; ch < stream_config.num_channels(); ++ch) { + (*frame)[ch] = &(*frame_samples)[ch * stream_config.num_frames()]; + } +} + +void CopyVectorToAudioBuffer(const StreamConfig& stream_config, + rtc::ArrayView<const float> source, + AudioBuffer* destination) { + std::vector<float*> input; + std::vector<float> input_samples; + + SetupFrame(stream_config, &input, &input_samples); + + RTC_CHECK_EQ(input_samples.size(), source.size()); + memcpy(input_samples.data(), source.data(), + source.size() * sizeof(source[0])); + + destination->CopyFrom(&input[0], stream_config); +} + +void ExtractVectorFromAudioBuffer(const StreamConfig& stream_config, + AudioBuffer* source, + std::vector<float>* destination) { + std::vector<float*> output; + + SetupFrame(stream_config, &output, destination); + + source->CopyTo(stream_config, &output[0]); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_buffer_tools.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_buffer_tools.h new file mode 100644 index 0000000000..dc53e4f6a2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_buffer_tools.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ + +#include <vector> +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" + +namespace webrtc { +namespace test { + +// Copies a vector into an audiobuffer. +void CopyVectorToAudioBuffer(const StreamConfig& stream_config, + rtc::ArrayView<const float> source, + AudioBuffer* destination); + +// Extracts a vector from an audiobuffer. +void ExtractVectorFromAudioBuffer(const StreamConfig& stream_config, + AudioBuffer* source, + std::vector<float>* destination); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_BUFFER_TOOLS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_processing_simulator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_processing_simulator.cc new file mode 100644 index 0000000000..416fd98849 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_processing_simulator.cc @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/audio_processing_simulator.h" + +#include <algorithm> +#include <iostream> +#include <sstream> +#include <string> +#include <utility> +#include <vector> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/fake_recording_device.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/stringutils.h" + +namespace webrtc { +namespace test { +namespace { + +void CopyFromAudioFrame(const AudioFrame& src, ChannelBuffer<float>* dest) { + RTC_CHECK_EQ(src.num_channels_, dest->num_channels()); + RTC_CHECK_EQ(src.samples_per_channel_, dest->num_frames()); + // Copy the data from the input buffer. + std::vector<float> tmp(src.samples_per_channel_ * src.num_channels_); + S16ToFloat(src.data(), tmp.size(), tmp.data()); + Deinterleave(tmp.data(), src.samples_per_channel_, src.num_channels_, + dest->channels()); +} + +std::string GetIndexedOutputWavFilename(const std::string& wav_name, + int counter) { + std::stringstream ss; + ss << wav_name.substr(0, wav_name.size() - 4) << "_" << counter + << wav_name.substr(wav_name.size() - 4); + return ss.str(); +} + +void WriteEchoLikelihoodGraphFileHeader(std::ofstream* output_file) { + (*output_file) << "import numpy as np" << std::endl + << "import matplotlib.pyplot as plt" << std::endl + << "y = np.array(["; +} + +void WriteEchoLikelihoodGraphFileFooter(std::ofstream* output_file) { + (*output_file) << "])" << std::endl + << "x = np.arange(len(y))*.01" << std::endl + << "plt.plot(x, y)" << std::endl + << "plt.ylabel('Echo likelihood')" << std::endl + << "plt.xlabel('Time (s)')" << std::endl + << "plt.ylim([0,1])" << std::endl + << "plt.show()" << std::endl; +} + +} // namespace + +SimulationSettings::SimulationSettings() = default; +SimulationSettings::SimulationSettings(const SimulationSettings&) = default; +SimulationSettings::~SimulationSettings() = default; + +void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest) { + RTC_CHECK_EQ(src.num_channels(), dest->num_channels_); + RTC_CHECK_EQ(src.num_frames(), dest->samples_per_channel_); + int16_t* dest_data = dest->mutable_data(); + for (size_t ch = 0; ch < dest->num_channels_; ++ch) { + for (size_t sample = 0; sample < dest->samples_per_channel_; ++sample) { + dest_data[sample * dest->num_channels_ + ch] = + src.channels()[ch][sample] * 32767; + } + } +} + +AudioProcessingSimulator::AudioProcessingSimulator( + const SimulationSettings& settings) + : settings_(settings), + analog_mic_level_(settings.initial_mic_level), + fake_recording_device_( + settings.initial_mic_level, + settings_.simulate_mic_gain ? *settings.simulated_mic_kind : 0), + worker_queue_("file_writer_task_queue") { + if (settings_.ed_graph_output_filename && + !settings_.ed_graph_output_filename->empty()) { + residual_echo_likelihood_graph_writer_.open( + *settings_.ed_graph_output_filename); + RTC_CHECK(residual_echo_likelihood_graph_writer_.is_open()); + WriteEchoLikelihoodGraphFileHeader(&residual_echo_likelihood_graph_writer_); + } + + if (settings_.simulate_mic_gain) + RTC_LOG(LS_VERBOSE) << "Simulating analog mic gain"; +} + +AudioProcessingSimulator::~AudioProcessingSimulator() { + if (residual_echo_likelihood_graph_writer_.is_open()) { + WriteEchoLikelihoodGraphFileFooter(&residual_echo_likelihood_graph_writer_); + residual_echo_likelihood_graph_writer_.close(); + } +} + +AudioProcessingSimulator::ScopedTimer::~ScopedTimer() { + int64_t interval = rtc::TimeNanos() - start_time_; + proc_time_->sum += interval; + proc_time_->max = std::max(proc_time_->max, interval); + proc_time_->min = std::min(proc_time_->min, interval); +} + +void AudioProcessingSimulator::ProcessStream(bool fixed_interface) { + // Optionally use the fake recording device to simulate analog gain. + if (settings_.simulate_mic_gain) { + if (settings_.aec_dump_input_filename) { + // When the analog gain is simulated and an AEC dump is used as input, set + // the undo level to |aec_dump_mic_level_| to virtually restore the + // unmodified microphone signal level. + fake_recording_device_.SetUndoMicLevel(aec_dump_mic_level_); + } + + if (fixed_interface) { + fake_recording_device_.SimulateAnalogGain(&fwd_frame_); + } else { + fake_recording_device_.SimulateAnalogGain(in_buf_.get()); + } + + // Notify the current mic level to AGC. + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->set_stream_analog_level( + fake_recording_device_.MicLevel())); + } else { + // Notify the current mic level to AGC. + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->set_stream_analog_level( + settings_.aec_dump_input_filename ? aec_dump_mic_level_ + : analog_mic_level_)); + } + + // Process the current audio frame. + if (fixed_interface) { + { + const auto st = ScopedTimer(mutable_proc_time()); + RTC_CHECK_EQ(AudioProcessing::kNoError, ap_->ProcessStream(&fwd_frame_)); + } + CopyFromAudioFrame(fwd_frame_, out_buf_.get()); + } else { + const auto st = ScopedTimer(mutable_proc_time()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->ProcessStream(in_buf_->channels(), in_config_, + out_config_, out_buf_->channels())); + } + + // Store the mic level suggested by AGC. + // Note that when the analog gain is simulated and an AEC dump is used as + // input, |analog_mic_level_| will not be used with set_stream_analog_level(). + analog_mic_level_ = ap_->gain_control()->stream_analog_level(); + if (settings_.simulate_mic_gain) { + fake_recording_device_.SetMicLevel(analog_mic_level_); + } + + if (buffer_writer_) { + buffer_writer_->Write(*out_buf_); + } + + if (residual_echo_likelihood_graph_writer_.is_open()) { + auto stats = ap_->GetStatistics(); + residual_echo_likelihood_graph_writer_ << stats.residual_echo_likelihood + << ", "; + } + + ++num_process_stream_calls_; +} + +void AudioProcessingSimulator::ProcessReverseStream(bool fixed_interface) { + if (fixed_interface) { + const auto st = ScopedTimer(mutable_proc_time()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->ProcessReverseStream(&rev_frame_)); + CopyFromAudioFrame(rev_frame_, reverse_out_buf_.get()); + + } else { + const auto st = ScopedTimer(mutable_proc_time()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->ProcessReverseStream( + reverse_in_buf_->channels(), reverse_in_config_, + reverse_out_config_, reverse_out_buf_->channels())); + } + + if (reverse_buffer_writer_) { + reverse_buffer_writer_->Write(*reverse_out_buf_); + } + + ++num_reverse_process_stream_calls_; +} + +void AudioProcessingSimulator::SetupBuffersConfigsOutputs( + int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_input_sample_rate_hz, + int reverse_output_sample_rate_hz, + int input_num_channels, + int output_num_channels, + int reverse_input_num_channels, + int reverse_output_num_channels) { + in_config_ = StreamConfig(input_sample_rate_hz, input_num_channels); + in_buf_.reset(new ChannelBuffer<float>( + rtc::CheckedDivExact(input_sample_rate_hz, kChunksPerSecond), + input_num_channels)); + + reverse_in_config_ = + StreamConfig(reverse_input_sample_rate_hz, reverse_input_num_channels); + reverse_in_buf_.reset(new ChannelBuffer<float>( + rtc::CheckedDivExact(reverse_input_sample_rate_hz, kChunksPerSecond), + reverse_input_num_channels)); + + out_config_ = StreamConfig(output_sample_rate_hz, output_num_channels); + out_buf_.reset(new ChannelBuffer<float>( + rtc::CheckedDivExact(output_sample_rate_hz, kChunksPerSecond), + output_num_channels)); + + reverse_out_config_ = + StreamConfig(reverse_output_sample_rate_hz, reverse_output_num_channels); + reverse_out_buf_.reset(new ChannelBuffer<float>( + rtc::CheckedDivExact(reverse_output_sample_rate_hz, kChunksPerSecond), + reverse_output_num_channels)); + + fwd_frame_.sample_rate_hz_ = input_sample_rate_hz; + fwd_frame_.samples_per_channel_ = + rtc::CheckedDivExact(fwd_frame_.sample_rate_hz_, kChunksPerSecond); + fwd_frame_.num_channels_ = input_num_channels; + + rev_frame_.sample_rate_hz_ = reverse_input_sample_rate_hz; + rev_frame_.samples_per_channel_ = + rtc::CheckedDivExact(rev_frame_.sample_rate_hz_, kChunksPerSecond); + rev_frame_.num_channels_ = reverse_input_num_channels; + + if (settings_.use_verbose_logging) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + + std::cout << "Sample rates:" << std::endl; + std::cout << " Forward input: " << input_sample_rate_hz << std::endl; + std::cout << " Forward output: " << output_sample_rate_hz << std::endl; + std::cout << " Reverse input: " << reverse_input_sample_rate_hz + << std::endl; + std::cout << " Reverse output: " << reverse_output_sample_rate_hz + << std::endl; + std::cout << "Number of channels: " << std::endl; + std::cout << " Forward input: " << input_num_channels << std::endl; + std::cout << " Forward output: " << output_num_channels << std::endl; + std::cout << " Reverse input: " << reverse_input_num_channels << std::endl; + std::cout << " Reverse output: " << reverse_output_num_channels + << std::endl; + } + + SetupOutput(); +} + +void AudioProcessingSimulator::SetupOutput() { + if (settings_.output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename(*settings_.output_filename, + output_reset_counter_); + } else { + filename = *settings_.output_filename; + } + + std::unique_ptr<WavWriter> out_file( + new WavWriter(filename, out_config_.sample_rate_hz(), + static_cast<size_t>(out_config_.num_channels()))); + buffer_writer_.reset(new ChannelBufferWavWriter(std::move(out_file))); + } + + if (settings_.reverse_output_filename) { + std::string filename; + if (settings_.store_intermediate_output) { + filename = GetIndexedOutputWavFilename(*settings_.reverse_output_filename, + output_reset_counter_); + } else { + filename = *settings_.reverse_output_filename; + } + + std::unique_ptr<WavWriter> reverse_out_file( + new WavWriter(filename, reverse_out_config_.sample_rate_hz(), + static_cast<size_t>(reverse_out_config_.num_channels()))); + reverse_buffer_writer_.reset( + new ChannelBufferWavWriter(std::move(reverse_out_file))); + } + + ++output_reset_counter_; +} + +void AudioProcessingSimulator::DestroyAudioProcessor() { + if (settings_.aec_dump_output_filename) { + ap_->DetachAecDump(); + } +} + +void AudioProcessingSimulator::CreateAudioProcessor() { + Config config; + AudioProcessing::Config apm_config; + std::unique_ptr<EchoControlFactory> echo_control_factory; + if (settings_.use_bf && *settings_.use_bf) { + config.Set<Beamforming>(new Beamforming( + true, ParseArrayGeometry(*settings_.microphone_positions), + SphericalPointf(DegreesToRadians(settings_.target_angle_degrees), 0.f, + 1.f))); + } + if (settings_.use_ts) { + config.Set<ExperimentalNs>(new ExperimentalNs(*settings_.use_ts)); + } + if (settings_.use_ie) { + config.Set<Intelligibility>(new Intelligibility(*settings_.use_ie)); + } + if (settings_.use_agc2) { + apm_config.gain_controller2.enabled = *settings_.use_agc2; + apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db; + } + if (settings_.use_aec3 && *settings_.use_aec3) { + echo_control_factory.reset(new EchoCanceller3Factory()); + } + if (settings_.use_lc) { + apm_config.level_controller.enabled = *settings_.use_lc; + } + if (settings_.use_hpf) { + apm_config.high_pass_filter.enabled = *settings_.use_hpf; + } + + if (settings_.use_refined_adaptive_filter) { + config.Set<RefinedAdaptiveFilter>( + new RefinedAdaptiveFilter(*settings_.use_refined_adaptive_filter)); + } + config.Set<ExtendedFilter>(new ExtendedFilter( + !settings_.use_extended_filter || *settings_.use_extended_filter)); + config.Set<DelayAgnostic>(new DelayAgnostic(!settings_.use_delay_agnostic || + *settings_.use_delay_agnostic)); + config.Set<ExperimentalAgc>(new ExperimentalAgc( + !settings_.use_experimental_agc || *settings_.use_experimental_agc)); + if (settings_.use_ed) { + apm_config.residual_echo_detector.enabled = *settings_.use_ed; + } + + ap_.reset(AudioProcessing::Create(config, nullptr, + std::move(echo_control_factory), nullptr)); + RTC_CHECK(ap_); + + ap_->ApplyConfig(apm_config); + + if (settings_.use_aec) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_cancellation()->Enable(*settings_.use_aec)); + } + if (settings_.use_aecm) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_control_mobile()->Enable(*settings_.use_aecm)); + } + if (settings_.use_agc) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->Enable(*settings_.use_agc)); + } + if (settings_.use_ns) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->noise_suppression()->Enable(*settings_.use_ns)); + } + if (settings_.use_le) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->level_estimator()->Enable(*settings_.use_le)); + } + if (settings_.use_vad) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->voice_detection()->Enable(*settings_.use_vad)); + } + if (settings_.use_agc_limiter) { + RTC_CHECK_EQ(AudioProcessing::kNoError, ap_->gain_control()->enable_limiter( + *settings_.use_agc_limiter)); + } + if (settings_.agc_target_level) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->set_target_level_dbfs( + *settings_.agc_target_level)); + } + if (settings_.agc_compression_gain) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->gain_control()->set_compression_gain_db( + *settings_.agc_compression_gain)); + } + if (settings_.agc_mode) { + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->gain_control()->set_mode( + static_cast<webrtc::GainControl::Mode>(*settings_.agc_mode))); + } + + if (settings_.use_drift_compensation) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_cancellation()->enable_drift_compensation( + *settings_.use_drift_compensation)); + } + + if (settings_.aec_suppression_level) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_cancellation()->set_suppression_level( + static_cast<webrtc::EchoCancellation::SuppressionLevel>( + *settings_.aec_suppression_level))); + } + + if (settings_.aecm_routing_mode) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_control_mobile()->set_routing_mode( + static_cast<webrtc::EchoControlMobile::RoutingMode>( + *settings_.aecm_routing_mode))); + } + + if (settings_.use_aecm_comfort_noise) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->echo_control_mobile()->enable_comfort_noise( + *settings_.use_aecm_comfort_noise)); + } + + if (settings_.vad_likelihood) { + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->voice_detection()->set_likelihood( + static_cast<webrtc::VoiceDetection::Likelihood>( + *settings_.vad_likelihood))); + } + if (settings_.ns_level) { + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->noise_suppression()->set_level( + static_cast<NoiseSuppression::Level>(*settings_.ns_level))); + } + + if (settings_.use_ts) { + ap_->set_stream_key_pressed(*settings_.use_ts); + } + + if (settings_.aec_dump_output_filename) { + ap_->AttachAecDump(AecDumpFactory::Create( + *settings_.aec_dump_output_filename, -1, &worker_queue_)); + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_processing_simulator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_processing_simulator.h new file mode 100644 index 0000000000..41a3f45106 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audio_processing_simulator.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ + +#include <algorithm> +#include <fstream> +#include <limits> +#include <memory> +#include <string> + +#include "api/optional.h" +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/fake_recording_device.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/task_queue.h" +#include "rtc_base/timeutils.h" + +namespace webrtc { +namespace test { + +// Holds all the parameters available for controlling the simulation. +struct SimulationSettings { + SimulationSettings(); + SimulationSettings(const SimulationSettings&); + ~SimulationSettings(); + rtc::Optional<int> stream_delay; + rtc::Optional<int> stream_drift_samples; + rtc::Optional<int> output_sample_rate_hz; + rtc::Optional<int> output_num_channels; + rtc::Optional<int> reverse_output_sample_rate_hz; + rtc::Optional<int> reverse_output_num_channels; + rtc::Optional<std::string> microphone_positions; + int target_angle_degrees = 90; + rtc::Optional<std::string> output_filename; + rtc::Optional<std::string> reverse_output_filename; + rtc::Optional<std::string> input_filename; + rtc::Optional<std::string> reverse_input_filename; + rtc::Optional<std::string> artificial_nearend_filename; + rtc::Optional<bool> use_aec; + rtc::Optional<bool> use_aecm; + rtc::Optional<bool> use_ed; // Residual Echo Detector. + rtc::Optional<std::string> ed_graph_output_filename; + rtc::Optional<bool> use_agc; + rtc::Optional<bool> use_agc2; + rtc::Optional<bool> use_hpf; + rtc::Optional<bool> use_ns; + rtc::Optional<bool> use_ts; + rtc::Optional<bool> use_bf; + rtc::Optional<bool> use_ie; + rtc::Optional<bool> use_vad; + rtc::Optional<bool> use_le; + rtc::Optional<bool> use_all; + rtc::Optional<int> aec_suppression_level; + rtc::Optional<bool> use_delay_agnostic; + rtc::Optional<bool> use_extended_filter; + rtc::Optional<bool> use_drift_compensation; + rtc::Optional<bool> use_aec3; + rtc::Optional<bool> use_lc; + rtc::Optional<bool> use_experimental_agc; + rtc::Optional<int> aecm_routing_mode; + rtc::Optional<bool> use_aecm_comfort_noise; + rtc::Optional<int> agc_mode; + rtc::Optional<int> agc_target_level; + rtc::Optional<bool> use_agc_limiter; + rtc::Optional<int> agc_compression_gain; + float agc2_fixed_gain_db; + rtc::Optional<int> vad_likelihood; + rtc::Optional<int> ns_level; + rtc::Optional<bool> use_refined_adaptive_filter; + int initial_mic_level; + bool simulate_mic_gain = false; + rtc::Optional<int> simulated_mic_kind; + bool report_performance = false; + bool report_bitexactness = false; + bool use_verbose_logging = false; + bool discard_all_settings_in_aecdump = true; + rtc::Optional<std::string> aec_dump_input_filename; + rtc::Optional<std::string> aec_dump_output_filename; + bool fixed_interface = false; + bool store_intermediate_output = false; + rtc::Optional<std::string> custom_call_order_filename; +}; + +// Holds a few statistics about a series of TickIntervals. +struct TickIntervalStats { + TickIntervalStats() : min(std::numeric_limits<int64_t>::max()) {} + int64_t sum; + int64_t max; + int64_t min; +}; + +// Copies samples present in a ChannelBuffer into an AudioFrame. +void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest); + +// Provides common functionality for performing audioprocessing simulations. +class AudioProcessingSimulator { + public: + static const int kChunksPerSecond = 1000 / AudioProcessing::kChunkSizeMs; + + explicit AudioProcessingSimulator(const SimulationSettings& settings); + virtual ~AudioProcessingSimulator(); + + // Processes the data in the input. + virtual void Process() = 0; + + // Returns the execution time of all AudioProcessing calls. + const TickIntervalStats& proc_time() const { return proc_time_; } + + // Reports whether the processed recording was bitexact. + bool OutputWasBitexact() { return bitexact_output_; } + + size_t get_num_process_stream_calls() { return num_process_stream_calls_; } + size_t get_num_reverse_process_stream_calls() { + return num_reverse_process_stream_calls_; + } + + protected: + // RAII class for execution time measurement. Updates the provided + // TickIntervalStats based on the time between ScopedTimer creation and + // leaving the enclosing scope. + class ScopedTimer { + public: + explicit ScopedTimer(TickIntervalStats* proc_time) + : proc_time_(proc_time), start_time_(rtc::TimeNanos()) {} + + ~ScopedTimer(); + + private: + TickIntervalStats* const proc_time_; + int64_t start_time_; + }; + + TickIntervalStats* mutable_proc_time() { return &proc_time_; } + void ProcessStream(bool fixed_interface); + void ProcessReverseStream(bool fixed_interface); + void CreateAudioProcessor(); + void DestroyAudioProcessor(); + void SetupBuffersConfigsOutputs(int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_input_sample_rate_hz, + int reverse_output_sample_rate_hz, + int input_num_channels, + int output_num_channels, + int reverse_input_num_channels, + int reverse_output_num_channels); + + const SimulationSettings settings_; + std::unique_ptr<AudioProcessing> ap_; + + std::unique_ptr<ChannelBuffer<float>> in_buf_; + std::unique_ptr<ChannelBuffer<float>> out_buf_; + std::unique_ptr<ChannelBuffer<float>> reverse_in_buf_; + std::unique_ptr<ChannelBuffer<float>> reverse_out_buf_; + StreamConfig in_config_; + StreamConfig out_config_; + StreamConfig reverse_in_config_; + StreamConfig reverse_out_config_; + std::unique_ptr<ChannelBufferWavReader> buffer_reader_; + std::unique_ptr<ChannelBufferWavReader> reverse_buffer_reader_; + AudioFrame rev_frame_; + AudioFrame fwd_frame_; + bool bitexact_output_ = true; + int aec_dump_mic_level_ = 0; + + private: + void SetupOutput(); + + size_t num_process_stream_calls_ = 0; + size_t num_reverse_process_stream_calls_ = 0; + size_t output_reset_counter_ = 0; + std::unique_ptr<ChannelBufferWavWriter> buffer_writer_; + std::unique_ptr<ChannelBufferWavWriter> reverse_buffer_writer_; + TickIntervalStats proc_time_; + std::ofstream residual_echo_likelihood_graph_writer_; + int analog_mic_level_; + FakeRecordingDevice fake_recording_device_; + + rtc::TaskQueue worker_queue_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AudioProcessingSimulator); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_AUDIO_PROCESSING_SIMULATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/audioproc_float.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audioproc_float.cc new file mode 100644 index 0000000000..c5229a4e10 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/audioproc_float.cc @@ -0,0 +1,517 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <iostream> +#include <memory> + +#include <string.h> + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/test/aec_dump_based_simulator.h" +#include "modules/audio_processing/test/audio_processing_simulator.h" +#include "modules/audio_processing/test/wav_based_simulator.h" +#include "rtc_base/flags.h" + +namespace webrtc { +namespace test { +namespace { + +const int kParameterNotSpecifiedValue = -10000; + +const char kUsageDescription[] = + "Usage: audioproc_f [options] -i <input.wav>\n" + " or\n" + " audioproc_f [options] -dump_input <aec_dump>\n" + "\n\n" + "Command-line tool to simulate a call using the audio " + "processing module, either based on wav files or " + "protobuf debug dump recordings.\n"; + +DEFINE_string(dump_input, "", "Aec dump input filename"); +DEFINE_string(dump_output, "", "Aec dump output filename"); +DEFINE_string(i, "", "Forward stream input wav filename"); +DEFINE_string(o, "", "Forward stream output wav filename"); +DEFINE_string(ri, "", "Reverse stream input wav filename"); +DEFINE_string(ro, "", "Reverse stream output wav filename"); +DEFINE_string(artificial_nearend, "", "Artificial nearend wav filename"); +DEFINE_int(output_num_channels, + kParameterNotSpecifiedValue, + "Number of forward stream output channels"); +DEFINE_int(reverse_output_num_channels, + kParameterNotSpecifiedValue, + "Number of Reverse stream output channels"); +DEFINE_int(output_sample_rate_hz, + kParameterNotSpecifiedValue, + "Forward stream output sample rate in Hz"); +DEFINE_int(reverse_output_sample_rate_hz, + kParameterNotSpecifiedValue, + "Reverse stream output sample rate in Hz"); +DEFINE_string(mic_positions, + "", + "Space delimited cartesian coordinates of microphones in " + "meters. The coordinates of each point are contiguous. For a " + "two element array: \"x1 y1 z1 x2 y2 z2\""); +DEFINE_int(target_angle_degrees, + 90, + "The azimuth of the target in degrees (0-359). Only applies to " + "beamforming."); +DEFINE_bool(fixed_interface, + false, + "Use the fixed interface when operating on wav files"); +DEFINE_int(aec, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the echo canceller"); +DEFINE_int(aecm, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the mobile echo controller"); +DEFINE_int(ed, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate (0) the residual echo detector"); +DEFINE_string(ed_graph, "", "Output filename for graph of echo likelihood"); +DEFINE_int(agc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the AGC"); +DEFINE_int(agc2, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the AGC2"); +DEFINE_int(hpf, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the high-pass filter"); +DEFINE_int(ns, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the noise suppressor"); +DEFINE_int(ts, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the transient suppressor"); +DEFINE_int(bf, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the beamformer"); +DEFINE_int(ie, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the intelligibility enhancer"); +DEFINE_int(vad, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the voice activity detector"); +DEFINE_int(le, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the level estimator"); +DEFINE_bool(all_default, + false, + "Activate all of the default components (will be overridden by any " + "other settings)"); +DEFINE_int(aec_suppression_level, + kParameterNotSpecifiedValue, + "Set the aec suppression level (0-2)"); +DEFINE_int(delay_agnostic, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the AEC delay agnostic mode"); +DEFINE_int(extended_filter, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the AEC extended filter mode"); +DEFINE_int(drift_compensation, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the drift compensation"); +DEFINE_int(aec3, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the experimental AEC mode AEC3"); +DEFINE_int(lc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the level control"); +DEFINE_int(experimental_agc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the experimental AGC"); +DEFINE_int( + refined_adaptive_filter, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the refined adaptive filter functionality"); +DEFINE_int(aecm_routing_mode, + kParameterNotSpecifiedValue, + "Specify the AECM routing mode (0-4)"); +DEFINE_int(aecm_comfort_noise, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the AECM comfort noise"); +DEFINE_int(agc_mode, + kParameterNotSpecifiedValue, + "Specify the AGC mode (0-2)"); +DEFINE_int(agc_target_level, + kParameterNotSpecifiedValue, + "Specify the AGC target level (0-31)"); +DEFINE_int(agc_limiter, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the level estimator"); +DEFINE_int(agc_compression_gain, + kParameterNotSpecifiedValue, + "Specify the AGC compression gain (0-90)"); +DEFINE_float(agc2_fixed_gain_db, 0.f, "AGC2 fixed gain (dB) to apply"); +DEFINE_int(vad_likelihood, + kParameterNotSpecifiedValue, + "Specify the VAD likelihood (0-3)"); +DEFINE_int(ns_level, + kParameterNotSpecifiedValue, + "Specify the NS level (0-3)"); +DEFINE_int(stream_delay, + kParameterNotSpecifiedValue, + "Specify the stream delay in ms to use"); +DEFINE_int(stream_drift_samples, + kParameterNotSpecifiedValue, + "Specify the number of stream drift samples to use"); +DEFINE_int(initial_mic_level, 100, "Initial mic level (0-255)"); +DEFINE_int(simulate_mic_gain, + 0, + "Activate (1) or deactivate(0) the analog mic gain simulation"); +DEFINE_int(simulated_mic_kind, + kParameterNotSpecifiedValue, + "Specify which microphone kind to use for microphone simulation"); +DEFINE_bool(performance_report, false, "Report the APM performance "); +DEFINE_bool(verbose, false, "Produce verbose output"); +DEFINE_bool(bitexactness_report, + false, + "Report bitexactness for aec dump result reproduction"); +DEFINE_bool(discard_settings_in_aecdump, + false, + "Discard any config settings specified in the aec dump"); +DEFINE_bool(store_intermediate_output, + false, + "Creates new output files after each init"); +DEFINE_string(custom_call_order_file, "", "Custom process API call order file"); +DEFINE_bool(help, false, "Print this message"); + +void SetSettingIfSpecified(const std::string& value, + rtc::Optional<std::string>* parameter) { + if (value.compare("") != 0) { + *parameter = value; + } +} + +void SetSettingIfSpecified(int value, rtc::Optional<int>* parameter) { + if (value != kParameterNotSpecifiedValue) { + *parameter = value; + } +} + +void SetSettingIfFlagSet(int32_t flag, rtc::Optional<bool>* parameter) { + if (flag == 0) { + *parameter = false; + } else if (flag == 1) { + *parameter = true; + } +} + +SimulationSettings CreateSettings() { + SimulationSettings settings; + if (FLAG_all_default) { + settings.use_le = true; + settings.use_vad = true; + settings.use_ie = false; + settings.use_bf = false; + settings.use_ts = true; + settings.use_ns = true; + settings.use_hpf = true; + settings.use_agc = true; + settings.use_agc2 = false; + settings.use_aec = true; + settings.use_aecm = false; + settings.use_ed = false; + } + SetSettingIfSpecified(FLAG_dump_input, &settings.aec_dump_input_filename); + SetSettingIfSpecified(FLAG_dump_output, &settings.aec_dump_output_filename); + SetSettingIfSpecified(FLAG_i, &settings.input_filename); + SetSettingIfSpecified(FLAG_o, &settings.output_filename); + SetSettingIfSpecified(FLAG_ri, &settings.reverse_input_filename); + SetSettingIfSpecified(FLAG_ro, &settings.reverse_output_filename); + SetSettingIfSpecified(FLAG_artificial_nearend, + &settings.artificial_nearend_filename); + SetSettingIfSpecified(FLAG_output_num_channels, + &settings.output_num_channels); + SetSettingIfSpecified(FLAG_reverse_output_num_channels, + &settings.reverse_output_num_channels); + SetSettingIfSpecified(FLAG_output_sample_rate_hz, + &settings.output_sample_rate_hz); + SetSettingIfSpecified(FLAG_reverse_output_sample_rate_hz, + &settings.reverse_output_sample_rate_hz); + SetSettingIfSpecified(FLAG_mic_positions, &settings.microphone_positions); + settings.target_angle_degrees = FLAG_target_angle_degrees; + SetSettingIfFlagSet(FLAG_aec, &settings.use_aec); + SetSettingIfFlagSet(FLAG_aecm, &settings.use_aecm); + SetSettingIfFlagSet(FLAG_ed, &settings.use_ed); + SetSettingIfSpecified(FLAG_ed_graph, &settings.ed_graph_output_filename); + SetSettingIfFlagSet(FLAG_agc, &settings.use_agc); + SetSettingIfFlagSet(FLAG_agc2, &settings.use_agc2); + SetSettingIfFlagSet(FLAG_hpf, &settings.use_hpf); + SetSettingIfFlagSet(FLAG_ns, &settings.use_ns); + SetSettingIfFlagSet(FLAG_ts, &settings.use_ts); + SetSettingIfFlagSet(FLAG_bf, &settings.use_bf); + SetSettingIfFlagSet(FLAG_ie, &settings.use_ie); + SetSettingIfFlagSet(FLAG_vad, &settings.use_vad); + SetSettingIfFlagSet(FLAG_le, &settings.use_le); + SetSettingIfSpecified(FLAG_aec_suppression_level, + &settings.aec_suppression_level); + SetSettingIfFlagSet(FLAG_delay_agnostic, &settings.use_delay_agnostic); + SetSettingIfFlagSet(FLAG_extended_filter, &settings.use_extended_filter); + SetSettingIfFlagSet(FLAG_drift_compensation, + &settings.use_drift_compensation); + SetSettingIfFlagSet(FLAG_refined_adaptive_filter, + &settings.use_refined_adaptive_filter); + + SetSettingIfFlagSet(FLAG_aec3, &settings.use_aec3); + SetSettingIfFlagSet(FLAG_lc, &settings.use_lc); + SetSettingIfFlagSet(FLAG_experimental_agc, &settings.use_experimental_agc); + SetSettingIfSpecified(FLAG_aecm_routing_mode, &settings.aecm_routing_mode); + SetSettingIfFlagSet(FLAG_aecm_comfort_noise, + &settings.use_aecm_comfort_noise); + SetSettingIfSpecified(FLAG_agc_mode, &settings.agc_mode); + SetSettingIfSpecified(FLAG_agc_target_level, &settings.agc_target_level); + SetSettingIfFlagSet(FLAG_agc_limiter, &settings.use_agc_limiter); + SetSettingIfSpecified(FLAG_agc_compression_gain, + &settings.agc_compression_gain); + settings.agc2_fixed_gain_db = FLAG_agc2_fixed_gain_db; + SetSettingIfSpecified(FLAG_vad_likelihood, &settings.vad_likelihood); + SetSettingIfSpecified(FLAG_ns_level, &settings.ns_level); + SetSettingIfSpecified(FLAG_stream_delay, &settings.stream_delay); + SetSettingIfSpecified(FLAG_stream_drift_samples, + &settings.stream_drift_samples); + SetSettingIfSpecified(FLAG_custom_call_order_file, + &settings.custom_call_order_filename); + settings.initial_mic_level = FLAG_initial_mic_level; + settings.simulate_mic_gain = FLAG_simulate_mic_gain; + SetSettingIfSpecified(FLAG_simulated_mic_kind, &settings.simulated_mic_kind); + settings.report_performance = FLAG_performance_report; + settings.use_verbose_logging = FLAG_verbose; + settings.report_bitexactness = FLAG_bitexactness_report; + settings.discard_all_settings_in_aecdump = FLAG_discard_settings_in_aecdump; + settings.fixed_interface = FLAG_fixed_interface; + settings.store_intermediate_output = FLAG_store_intermediate_output; + + return settings; +} + +void ReportConditionalErrorAndExit(bool condition, const std::string& message) { + if (condition) { + std::cerr << message << std::endl; + exit(1); + } +} + +void PerformBasicParameterSanityChecks(const SimulationSettings& settings) { + if (settings.input_filename || settings.reverse_input_filename) { + ReportConditionalErrorAndExit(!!settings.aec_dump_input_filename, + "Error: The aec dump cannot be specified " + "together with input wav files!\n"); + + ReportConditionalErrorAndExit(!!settings.artificial_nearend_filename, + "Error: The artificial nearend cannot be " + "specified together with input wav files!\n"); + + ReportConditionalErrorAndExit(!settings.input_filename, + "Error: When operating at wav files, the " + "input wav filename must be " + "specified!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_filename && !settings.reverse_input_filename, + "Error: When operating at wav files, the reverse input wav filename " + "must be specified if the reverse output wav filename is specified!\n"); + } else { + ReportConditionalErrorAndExit(!settings.aec_dump_input_filename, + "Error: Either the aec dump or the wav " + "input files must be specified!\n"); + } + + ReportConditionalErrorAndExit( + settings.use_aec && *settings.use_aec && settings.use_aecm && + *settings.use_aecm, + "Error: The AEC and the AECM cannot be activated at the same time!\n"); + + ReportConditionalErrorAndExit( + settings.output_sample_rate_hz && *settings.output_sample_rate_hz <= 0, + "Error: --output_sample_rate_hz must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_sample_rate_hz && + settings.output_sample_rate_hz && + *settings.output_sample_rate_hz <= 0, + "Error: --reverse_output_sample_rate_hz must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.output_num_channels && *settings.output_num_channels <= 0, + "Error: --output_num_channels must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_num_channels && + *settings.reverse_output_num_channels <= 0, + "Error: --reverse_output_num_channels must be positive!\n"); + + ReportConditionalErrorAndExit( + settings.use_bf && *settings.use_bf && !settings.microphone_positions, + "Error: --mic_positions must be specified when the beamformer is " + "activated.\n"); + + ReportConditionalErrorAndExit( + settings.target_angle_degrees < 0 || settings.target_angle_degrees > 359, + "Error: -target_angle_degrees must be specified between 0 and 359.\n"); + + ReportConditionalErrorAndExit( + settings.aec_suppression_level && + ((*settings.aec_suppression_level) < 0 || + (*settings.aec_suppression_level) > 2), + "Error: --aec_suppression_level must be specified between 0 and 2.\n"); + + ReportConditionalErrorAndExit( + settings.aecm_routing_mode && ((*settings.aecm_routing_mode) < 0 || + (*settings.aecm_routing_mode) > 4), + "Error: --aecm_routing_mode must be specified between 0 and 4.\n"); + + ReportConditionalErrorAndExit( + settings.agc_target_level && ((*settings.agc_target_level) < 0 || + (*settings.agc_target_level) > 31), + "Error: --agc_target_level must be specified between 0 and 31.\n"); + + ReportConditionalErrorAndExit( + settings.agc_compression_gain && ((*settings.agc_compression_gain) < 0 || + (*settings.agc_compression_gain) > 90), + "Error: --agc_compression_gain must be specified between 0 and 90.\n"); + + ReportConditionalErrorAndExit( + settings.use_agc && *settings.use_agc && settings.use_agc2 && + *settings.use_agc2, + "Error: --agc and --agc2 cannot be both active.\n"); + + ReportConditionalErrorAndExit( + settings.use_agc2 && *settings.use_agc2 && + ((settings.agc2_fixed_gain_db) < 0 || + (settings.agc2_fixed_gain_db) > 90), + "Error: --agc2_fixed_gain_db must be specified between 0 and 90.\n"); + + ReportConditionalErrorAndExit( + settings.vad_likelihood && + ((*settings.vad_likelihood) < 0 || (*settings.vad_likelihood) > 3), + "Error: --vad_likelihood must be specified between 0 and 3.\n"); + + ReportConditionalErrorAndExit( + settings.ns_level && + ((*settings.ns_level) < 0 || (*settings.ns_level) > 3), + "Error: --ns_level must be specified between 0 and 3.\n"); + + ReportConditionalErrorAndExit( + settings.report_bitexactness && !settings.aec_dump_input_filename, + "Error: --bitexactness_report can only be used when operating on an " + "aecdump\n"); + + ReportConditionalErrorAndExit( + settings.custom_call_order_filename && settings.aec_dump_input_filename, + "Error: --custom_call_order_file cannot be used when operating on an " + "aecdump\n"); + + ReportConditionalErrorAndExit( + (settings.initial_mic_level < 0 || settings.initial_mic_level > 255), + "Error: --initial_mic_level must be specified between 0 and 255.\n"); + + ReportConditionalErrorAndExit( + settings.simulated_mic_kind && !settings.simulate_mic_gain, + "Error: --simulated_mic_kind cannot be specified when mic simulation is " + "disabled\n"); + + ReportConditionalErrorAndExit( + !settings.simulated_mic_kind && settings.simulate_mic_gain, + "Error: --simulated_mic_kind must be specified when mic simulation is " + "enabled\n"); + + auto valid_wav_name = [](const std::string& wav_file_name) { + if (wav_file_name.size() < 5) { + return false; + } + if ((wav_file_name.compare(wav_file_name.size() - 4, 4, ".wav") == 0) || + (wav_file_name.compare(wav_file_name.size() - 4, 4, ".WAV") == 0)) { + return true; + } + return false; + }; + + ReportConditionalErrorAndExit( + settings.input_filename && (!valid_wav_name(*settings.input_filename)), + "Error: --i must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.output_filename && (!valid_wav_name(*settings.output_filename)), + "Error: --o must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.reverse_input_filename && + (!valid_wav_name(*settings.reverse_input_filename)), + "Error: --ri must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.reverse_output_filename && + (!valid_wav_name(*settings.reverse_output_filename)), + "Error: --ro must be a valid .wav file name.\n"); + + ReportConditionalErrorAndExit( + settings.artificial_nearend_filename && + !valid_wav_name(*settings.artificial_nearend_filename), + "Error: --artifical_nearend must be a valid .wav file name.\n"); +} + +} // namespace + +int main(int argc, char* argv[]) { + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) || + FLAG_help || argc != 1) { + printf("%s", kUsageDescription); + if (FLAG_help) { + rtc::FlagList::Print(nullptr, false); + return 0; + } + return 1; + } + + SimulationSettings settings = CreateSettings(); + PerformBasicParameterSanityChecks(settings); + std::unique_ptr<AudioProcessingSimulator> processor; + + if (settings.aec_dump_input_filename) { + processor.reset(new AecDumpBasedSimulator(settings)); + } else { + processor.reset(new WavBasedSimulator(settings)); + } + + processor->Process(); + + if (settings.report_performance) { + const auto& proc_time = processor->proc_time(); + int64_t exec_time_us = proc_time.sum / rtc::kNumNanosecsPerMicrosec; + std::cout << std::endl + << "Execution time: " << exec_time_us * 1e-6 << " s, File time: " + << processor->get_num_process_stream_calls() * 1.f / + AudioProcessingSimulator::kChunksPerSecond + << std::endl + << "Time per fwd stream chunk (mean, max, min): " << std::endl + << exec_time_us * 1.f / processor->get_num_process_stream_calls() + << " us, " << 1.f * proc_time.max / rtc::kNumNanosecsPerMicrosec + << " us, " << 1.f * proc_time.min / rtc::kNumNanosecsPerMicrosec + << " us" << std::endl; + } + + if (settings.report_bitexactness && settings.aec_dump_input_filename) { + if (processor->OutputWasBitexact()) { + std::cout << "The processing was bitexact."; + } else { + std::cout << "The processing was not bitexact."; + } + } + + return 0; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/bitexactness_tools.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/bitexactness_tools.cc new file mode 100644 index 0000000000..c49bb5099f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/bitexactness_tools.cc @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/bitexactness_tools.h" + +#include <math.h> +#include <algorithm> +#include <string> +#include <vector> + +#include "api/array_view.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { +namespace test { + +std::string GetApmRenderTestVectorFileName(int sample_rate_hz) { + switch (sample_rate_hz) { + case 8000: + return ResourcePath("far8_stereo", "pcm"); + case 16000: + return ResourcePath("far16_stereo", "pcm"); + case 32000: + return ResourcePath("far32_stereo", "pcm"); + case 48000: + return ResourcePath("far48_stereo", "pcm"); + default: + RTC_NOTREACHED(); + } + return ""; +} + +std::string GetApmCaptureTestVectorFileName(int sample_rate_hz) { + switch (sample_rate_hz) { + case 8000: + return ResourcePath("near8_stereo", "pcm"); + case 16000: + return ResourcePath("near16_stereo", "pcm"); + case 32000: + return ResourcePath("near32_stereo", "pcm"); + case 48000: + return ResourcePath("near48_stereo", "pcm"); + default: + RTC_NOTREACHED(); + } + return ""; +} + +void ReadFloatSamplesFromStereoFile(size_t samples_per_channel, + size_t num_channels, + InputAudioFile* stereo_pcm_file, + rtc::ArrayView<float> data) { + RTC_DCHECK_EQ(data.size(), samples_per_channel * num_channels); + std::vector<int16_t> read_samples(samples_per_channel * 2); + stereo_pcm_file->Read(samples_per_channel * 2, read_samples.data()); + + // Convert samples to float and discard any channels not needed. + for (size_t sample = 0; sample < samples_per_channel; ++sample) { + for (size_t channel = 0; channel < num_channels; ++channel) { + data[sample * num_channels + channel] = + read_samples[sample * 2 + channel] / 32768.0f; + } + } +} + +::testing::AssertionResult VerifyDeinterleavedArray( + size_t samples_per_channel, + size_t num_channels, + rtc::ArrayView<const float> reference, + rtc::ArrayView<const float> output, + float element_error_bound) { + // Form vectors to compare the reference to. Only the first values of the + // outputs are compared in order not having to specify all preceeding frames + // as testvectors. + const size_t reference_frame_length = + rtc::CheckedDivExact(reference.size(), num_channels); + + std::vector<float> output_to_verify; + for (size_t channel_no = 0; channel_no < num_channels; ++channel_no) { + output_to_verify.insert(output_to_verify.end(), + output.begin() + channel_no * samples_per_channel, + output.begin() + channel_no * samples_per_channel + + reference_frame_length); + } + + return VerifyArray(reference, output_to_verify, element_error_bound); +} + +::testing::AssertionResult VerifyArray(rtc::ArrayView<const float> reference, + rtc::ArrayView<const float> output, + float element_error_bound) { + // The vectors are deemed to be bitexact only if + // a) output have a size at least as long as the reference. + // b) the samples in the reference are bitexact with the corresponding samples + // in the output. + + bool equal = true; + if (output.size() < reference.size()) { + equal = false; + } else { + // Compare the first samples in the vectors. + for (size_t k = 0; k < reference.size(); ++k) { + if (fabs(output[k] - reference[k]) > element_error_bound) { + equal = false; + break; + } + } + } + + if (equal) { + return ::testing::AssertionSuccess(); + } + + // Lambda function that produces a formatted string with the data in the + // vector. + auto print_vector_in_c_format = [](rtc::ArrayView<const float> v, + size_t num_values_to_print) { + std::string s = "{ "; + for (size_t k = 0; k < std::min(num_values_to_print, v.size()); ++k) { + s += std::to_string(v[k]) + "f"; + s += (k < (num_values_to_print - 1)) ? ", " : ""; + } + return s + " }"; + }; + + // If the vectors are deemed not to be similar, return a report of the + // difference. + return ::testing::AssertionFailure() + << std::endl + << " Actual values : " + << print_vector_in_c_format(output, + std::min(output.size(), reference.size())) + << std::endl + << " Expected values: " + << print_vector_in_c_format(reference, reference.size()) << std::endl; +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/bitexactness_tools.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/bitexactness_tools.h new file mode 100644 index 0000000000..eb5ee081c3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/bitexactness_tools.h @@ -0,0 +1,56 @@ + +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ + +#include <string> + +#include "api/array_view.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { + +// Returns test vector to use for the render signal in an +// APM bitexactness test. +std::string GetApmRenderTestVectorFileName(int sample_rate_hz); + +// Returns test vector to use for the capture signal in an +// APM bitexactness test. +std::string GetApmCaptureTestVectorFileName(int sample_rate_hz); + +// Extract float samples from a pcm file. +void ReadFloatSamplesFromStereoFile(size_t samples_per_channel, + size_t num_channels, + InputAudioFile* stereo_pcm_file, + rtc::ArrayView<float> data); + +// Verifies a frame against a reference and returns the results as an +// AssertionResult. +::testing::AssertionResult VerifyDeinterleavedArray( + size_t samples_per_channel, + size_t num_channels, + rtc::ArrayView<const float> reference, + rtc::ArrayView<const float> output, + float element_error_bound); + +// Verifies a vector against a reference and returns the results as an +// AssertionResult. +::testing::AssertionResult VerifyArray(rtc::ArrayView<const float> reference, + rtc::ArrayView<const float> output, + float element_error_bound); + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_BITEXACTNESS_TOOLS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn new file mode 100644 index 0000000000..c7263301b6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/BUILD.gn @@ -0,0 +1,75 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +group("conversational_speech") { + testonly = true + deps = [ + ":conversational_speech_generator", + ] +} + +rtc_executable("conversational_speech_generator") { + testonly = true + sources = [ + "generator.cc", + ] + deps = [ + ":lib", + "../../../../rtc_base:rtc_base_approved", + "../../../../test:test_support", + ] +} + +rtc_static_library("lib") { + testonly = true + sources = [ + "config.cc", + "config.h", + "multiend_call.cc", + "multiend_call.h", + "simulator.cc", + "simulator.h", + "timing.cc", + "timing.h", + "wavreader_abstract_factory.h", + "wavreader_factory.cc", + "wavreader_factory.h", + "wavreader_interface.h", + ] + deps = [ + "../../../..:webrtc_common", + "../../../../api:array_view", + "../../../../common_audio", + "../../../../rtc_base:rtc_base_approved", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. +} + +rtc_source_set("unittest") { + testonly = true + sources = [ + "generator_unittest.cc", + "mock_wavreader.cc", + "mock_wavreader.h", + "mock_wavreader_factory.cc", + "mock_wavreader_factory.h", + ] + deps = [ + ":lib", + "../../../..:webrtc_common", + "../../../../api:array_view", + "../../../../api:optional", + "../../../../common_audio", + "../../../../rtc_base:rtc_base_approved", + "../../../../test:test_support", + "//testing/gmock", + "//testing/gtest", + ] +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/OWNERS b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/OWNERS new file mode 100644 index 0000000000..0981733ba9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/OWNERS @@ -0,0 +1,6 @@ +alessiob@webrtc.org +henrik.lundin@webrtc.org +peah@webrtc.org + +per-file *.gn=* +per-file *.gni=* diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/README.md b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/README.md new file mode 100644 index 0000000000..bbb4112fc0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/README.md @@ -0,0 +1,72 @@ +# Conversational Speech generator tool + +Tool to generate multiple-end audio tracks to simulate conversational speech +with two or more participants. + +The input to the tool is a directory containing a number of audio tracks and +a text file indicating how to time the sequence of speech turns (see the Example +section). + +Since the timing of the speaking turns is specified by the user, the generated +tracks may not be suitable for testing scenarios in which there is unpredictable +network delay (e.g., end-to-end RTC assessment). + +Instead, the generated pairs can be used when the delay is constant (obviously +including the case in which there is no delay). +For instance, echo cancellation in the APM module can be evaluated using two-end +audio tracks as input and reverse input. + +By indicating negative and positive time offsets, one can reproduce cross-talk +(aka double-talk) and silence in the conversation. + +### Example + +For each end, there is a set of audio tracks, e.g., a1, a2 and a3 (speaker A) +and b1, b2 (speaker B). +The text file with the timing information may look like this: + +``` +A a1 0 +B b1 0 +A a2 100 +B b2 -200 +A a3 0 +A a4 0 +``` + +The first column indicates the speaker name, the second contains the audio track +file names, and the third the offsets (in milliseconds) used to concatenate the +chunks. + +Assume that all the audio tracks in the example above are 1000 ms long. +The tool will then generate two tracks (A and B) that look like this: + +**Track A** +``` + a1 (1000 ms) + silence (1100 ms) + a2 (1000 ms) + silence (800 ms) + a3 (1000 ms) + a4 (1000 ms) +``` + +**Track B** +``` + silence (1000 ms) + b1 (1000 ms) + silence (900 ms) + b2 (1000 ms) + silence (2000 ms) +``` + +The two tracks can be also visualized as follows (one characheter represents +100 ms, "." is silence and "*" is speech). + +``` +t: 0 1 2 3 4 5 6 (s) +A: **********...........**********........******************** +B: ..........**********.........**********.................... + ^ 200 ms cross-talk + 100 ms silence ^ +``` diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/config.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/config.cc new file mode 100644 index 0000000000..76d3de8108 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/config.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/config.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +const std::string& Config::audiotracks_path() const { + return audiotracks_path_; +} + +const std::string& Config::timing_filepath() const { + return timing_filepath_; +} + +const std::string& Config::output_path() const { + return output_path_; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/config.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/config.h new file mode 100644 index 0000000000..f219a65f60 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/config.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ + +#include <string> + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct Config { + Config(const std::string& audiotracks_path, + const std::string& timing_filepath, + const std::string& output_path) + : audiotracks_path_(audiotracks_path), + timing_filepath_(timing_filepath), + output_path_(output_path) {} + + const std::string& audiotracks_path() const; + const std::string& timing_filepath() const; + const std::string& output_path() const; + + const std::string audiotracks_path_; + const std::string timing_filepath_; + const std::string output_path_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_CONFIG_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/generator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/generator.cc new file mode 100644 index 0000000000..ee3f81a9a2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/generator.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <iostream> + +#include "modules/audio_processing/test/conversational_speech/config.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" +#include "modules/audio_processing/test/conversational_speech/simulator.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" +#include "rtc_base/flags.h" +#include "rtc_base/ptr_util.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { +namespace test { +namespace { + +const char kUsageDescription[] = + "Usage: conversational_speech_generator\n" + " -i <path/to/source/audiotracks>\n" + " -t <path/to/timing_file.txt>\n" + " -o <output/path>\n" + "\n\n" + "Command-line tool to generate multiple-end audio tracks to simulate " + "conversational speech with two or more participants.\n"; + +DEFINE_string(i, "", "Directory containing the speech turn wav files"); +DEFINE_string(t, "", "Path to the timing text file"); +DEFINE_string(o, "", "Output wav files destination path"); +DEFINE_bool(help, false, "Prints this message"); + +} // namespace + +int main(int argc, char* argv[]) { + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) || + FLAG_help || argc != 1) { + printf("%s", kUsageDescription); + if (FLAG_help) { + rtc::FlagList::Print(nullptr, false); + return 0; + } + return 1; + } + RTC_CHECK(DirExists(FLAG_i)); + RTC_CHECK(FileExists(FLAG_t)); + RTC_CHECK(DirExists(FLAG_o)); + + conversational_speech::Config config(FLAG_i, FLAG_t, FLAG_o); + + // Load timing. + std::vector<conversational_speech::Turn> timing = + conversational_speech::LoadTiming(config.timing_filepath()); + + // Parse timing and audio tracks. + auto wavreader_factory = rtc::MakeUnique< + conversational_speech::WavReaderFactory>(); + conversational_speech::MultiEndCall multiend_call( + timing, config.audiotracks_path(), std::move(wavreader_factory)); + + // Generate output audio tracks. + auto generated_audiotrack_pairs = conversational_speech::Simulate( + multiend_call, config.output_path()); + + // Show paths to created audio tracks. + std::cout << "Output files:" << std::endl; + for (const auto& output_paths_entry : *generated_audiotrack_pairs) { + std::cout << " speaker: " << output_paths_entry.first << std::endl; + std::cout << " near end: " << output_paths_entry.second.near_end + << std::endl; + std::cout << " far end: " << output_paths_entry.second.far_end + << std::endl; + } + + return 0; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc new file mode 100644 index 0000000000..064e8c82d7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/generator_unittest.cc @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file consists of unit tests for webrtc::test::conversational_speech +// members. Part of them focus on accepting or rejecting different +// conversational speech setups. A setup is defined by a set of audio tracks and +// timing information). +// The docstring at the beginning of each TEST(ConversationalSpeechTest, +// MultiEndCallSetup*) function looks like the drawing below and indicates which +// setup is tested. +// +// Accept: +// A 0****..... +// B .....1**** +// +// The drawing indicates the following: +// - the illustrated setup should be accepted, +// - there are two speakers (namely, A and B), +// - A is the first speaking, B is the second one, +// - each character after the speaker's letter indicates a time unit (e.g., 100 +// ms), +// - "*" indicates speaking, "." listening, +// - numbers indicate the turn index in std::vector<Turn>. +// +// Note that the same speaker can appear in multiple lines in order to depict +// cases in which there are wrong offsets leading to self cross-talk (which is +// rejected). + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include <stdio.h> +#include <cmath> +#include <map> +#include <memory> +#include <vector> + +#include "api/optional.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/test/conversational_speech/config.h" +#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" +#include "modules/audio_processing/test/conversational_speech/simulator.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" +#include "rtc_base/logging.h" +#include "rtc_base/pathutils.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::LoadTiming; +using conversational_speech::SaveTiming; +using conversational_speech::MockWavReaderFactory; +using conversational_speech::MultiEndCall; +using conversational_speech::Turn; +using conversational_speech::WavReaderFactory; + +const char* const audiotracks_path = "/path/to/audiotracks"; +const char* const timing_filepath = "/path/to/timing_file.txt"; +const char* const output_path = "/path/to/output_dir"; + +const std::vector<Turn> expected_timing = { + {"A", "a1", 0}, + {"B", "b1", 0}, + {"A", "a2", 100}, + {"B", "b2", -200}, + {"A", "a3", 0}, + {"A", "a3", 0}, +}; +const std::size_t kNumberOfTurns = expected_timing.size(); + +// Default arguments for MockWavReaderFactory ctor. +// Fake audio track parameters. +constexpr int kDefaultSampleRate = 48000; +const std::map<std::string, const MockWavReaderFactory::Params> + kDefaultMockWavReaderFactoryParamsMap = { + {"t300", {kDefaultSampleRate, 1u, 14400u}}, // Mono, 0.3 seconds. + {"t500", {kDefaultSampleRate, 1u, 24000u}}, // Mono, 0.5 seconds. + {"t1000", {kDefaultSampleRate, 1u, 48000u}}, // Mono, 1.0 seconds. + {"sr8000", {8000, 1u, 8000u}}, // 8kHz sample rate, mono, 1 second. + {"sr16000", {16000, 1u, 16000u}}, // 16kHz sample rate, mono, 1 second. + {"sr16000_stereo", {16000, 2u, 16000u}}, // Like sr16000, but stereo. +}; +const MockWavReaderFactory::Params& kDefaultMockWavReaderFactoryParams = + kDefaultMockWavReaderFactoryParamsMap.at("t500"); + +std::unique_ptr<MockWavReaderFactory> CreateMockWavReaderFactory() { + return std::unique_ptr<MockWavReaderFactory>( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); +} + +void CreateSineWavFile(const std::string& filepath, + const MockWavReaderFactory::Params& params, + float frequency = 440.0f) { + // Create samples. + constexpr double two_pi = 2.0 * M_PI; + std::vector<int16_t> samples(params.num_samples); + for (std::size_t i = 0; i < params.num_samples; ++i) { + // TODO(alessiob): the produced tone is not pure, improve. + samples[i] = std::lround(32767.0f * std::sin( + two_pi * i * frequency / params.sample_rate)); + } + + // Write samples. + WavWriter wav_writer(filepath, params.sample_rate, params.num_channels); + wav_writer.WriteSamples(samples.data(), params.num_samples); +} + +// Parameters to generate audio tracks with CreateSineWavFile. +struct SineAudioTrackParams { + MockWavReaderFactory::Params params; + float frequency; +}; + +// Creates a temporary directory in which sine audio tracks are written. +std::string CreateTemporarySineAudioTracks( + const std::map<std::string, SineAudioTrackParams>& sine_tracks_params) { + // Create temporary directory. + rtc::Pathname temp_directory(OutputPath()); + temp_directory.AppendFolder("TempConversationalSpeechAudioTracks"); + CreateDir(temp_directory.pathname()); + + // Create sine tracks. + for (const auto& it : sine_tracks_params) { + const rtc::Pathname temp_filepath(temp_directory.pathname(), it.first); + CreateSineWavFile( + temp_filepath.pathname(), it.second.params, it.second.frequency); + } + + return temp_directory.pathname(); +} + +void CheckAudioTrackParams(const WavReaderFactory& wav_reader_factory, + const std::string& filepath, + const MockWavReaderFactory::Params& expeted_params) { + auto wav_reader = wav_reader_factory.Create(filepath); + EXPECT_EQ(expeted_params.sample_rate, wav_reader->SampleRate()); + EXPECT_EQ(expeted_params.num_channels, wav_reader->NumChannels()); + EXPECT_EQ(expeted_params.num_samples, wav_reader->NumSamples()); +} + +void DeleteFolderAndContents(const std::string& dir) { + if (!DirExists(dir)) { return; } + rtc::Optional<std::vector<std::string>> dir_content = ReadDirectory(dir); + EXPECT_TRUE(dir_content); + for (const auto& path : *dir_content) { + if (DirExists(path)) { + DeleteFolderAndContents(path); + } else if (FileExists(path)) { + // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. + RemoveFile(path); + } else { + FAIL(); + } + } + // TODO(alessiob): Wrap with EXPECT_TRUE() once webrtc:7769 bug fixed. + RemoveDir(dir); +} + +} // namespace + +using testing::_; + +TEST(ConversationalSpeechTest, Settings) { + const conversational_speech::Config config( + audiotracks_path, timing_filepath, output_path); + + // Test getters. + EXPECT_EQ(audiotracks_path, config.audiotracks_path()); + EXPECT_EQ(timing_filepath, config.timing_filepath()); + EXPECT_EQ(output_path, config.output_path()); +} + +TEST(ConversationalSpeechTest, TimingSaveLoad) { + // Save test timing. + const std::string temporary_filepath = TempFilename( + OutputPath(), "TempTimingTestFile"); + SaveTiming(temporary_filepath, expected_timing); + + // Create a std::vector<Turn> instance by loading from file. + std::vector<Turn> actual_timing = LoadTiming(temporary_filepath); + std::remove(temporary_filepath.c_str()); + + // Check size. + EXPECT_EQ(expected_timing.size(), actual_timing.size()); + + // Check Turn instances. + for (size_t index = 0; index < expected_timing.size(); ++index) { + EXPECT_EQ(expected_timing[index], actual_timing[index]) + << "turn #" << index << " not matching"; + } +} + +TEST(ConversationalSpeechTest, MultiEndCallCreate) { + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are 5 unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(5); + + // Inject the mock wav reader factory. + conversational_speech::MultiEndCall multiend_call( + expected_timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(5u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(6u, multiend_call.speaking_turns().size()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupDifferentSampleRates) { + const std::vector<Turn> timing = { + {"A", "sr8000", 0}, + {"B", "sr16000", 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); + + MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupMultipleChannels) { + const std::vector<Turn> timing = { + {"A", "sr16000_stereo", 0}, + {"B", "sr16000_stereo", 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(1); + + MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, + MultiEndCallSetupDifferentSampleRatesAndMultipleNumChannels) { + const std::vector<Turn> timing = { + {"A", "sr8000", 0}, + {"B", "sr16000_stereo", 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(testing::_)).Times(2); + + MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupFirstOffsetNegative) { + const std::vector<Turn> timing = { + {"A", "t500", -100}, + {"B", "t500", 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSimple) { + // Accept: + // A 0****..... + // B .....1**** + constexpr std::size_t expected_duration = kDefaultSampleRate; + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"B", "t500", 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupPause) { + // Accept: + // A 0****....... + // B .......1**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"B", "t500", 200}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalk) { + // Accept: + // A 0****.... + // B ....1**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 0.9; + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"B", "t500", -100}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupInvalidOrder) { + // Reject: + // A ..0**** + // B .1****. The n-th turn cannot start before the (n-1)-th one. + const std::vector<Turn> timing = { + {"A", "t500", 200}, + {"B", "t500", -600}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkThree) { + // Accept: + // A 0****2****... + // B ...1********* + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.3; + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"B", "t1000", -200}, + {"A", "t500", -800}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkNearInvalid) { + // Reject: + // A 0****...... + // A ...1****... + // B ......2**** + // ^ Turn #1 overlaps with #0 which is from the same speaker. + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"A", "t500", -200}, + {"B", "t500", -200}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupSelfCrossTalkFarInvalid) { + // Reject: + // A 0********* + // B 1**....... + // C ...2**.... + // A ......3**. + // ^ Turn #3 overlaps with #0 which is from the same speaker. + const std::vector<Turn> timing = { + {"A", "t1000", 0}, + {"B", "t300", -1000}, + {"C", "t300", 0}, + {"A", "t300", 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleValid) { + // Accept: + // A 0*********.. + // B ..1****..... + // C .......2**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector<Turn> timing = { + {"A", "t1000", 0}, + {"B", "t500", -800}, + {"C", "t500", 0}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleInvalid) { + // Reject: + // A 0********* + // B ..1****... + // C ....2****. + // ^ Turn #2 overlaps both with #0 and #1 (cross-talk with 3+ speakers + // not permitted). + const std::vector<Turn> timing = { + {"A", "t1000", 0}, + {"B", "t500", -800}, + {"C", "t500", -300}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkMiddleAndPause) { + // Accept: + // A 0*********.. + // B .2****...... + // C .......3**** + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.2; + const std::vector<Turn> timing = { + {"A", "t1000", 0}, + {"B", "t500", -900}, + {"C", "t500", 100}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(3u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupCrossTalkFullOverlapValid) { + // Accept: + // A 0**** + // B 1**** + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"B", "t500", -500}, + }; + auto mock_wavreader_factory = CreateMockWavReaderFactory(); + + // There is one unique audio track to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(1); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(2u, multiend_call.speaker_names().size()); + EXPECT_EQ(1u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(2u, multiend_call.speaking_turns().size()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequence) { + // Accept: + // A 0****....3****.5**. + // B .....1****...4**... + // C ......2**.......6**.. + constexpr std::size_t expected_duration = kDefaultSampleRate * 1.9; + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"B", "t500", 0}, + {"C", "t300", -400}, + {"A", "t500", 0}, + {"B", "t300", -100}, + {"A", "t300", -100}, + {"C", "t300", -200}, + }; + auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_TRUE(multiend_call.valid()); + + // Test. + EXPECT_EQ(3u, multiend_call.speaker_names().size()); + EXPECT_EQ(2u, multiend_call.audiotrack_readers().size()); + EXPECT_EQ(7u, multiend_call.speaking_turns().size()); + EXPECT_EQ(expected_duration, multiend_call.total_duration_samples()); +} + +TEST(ConversationalSpeechTest, MultiEndCallSetupLongSequenceInvalid) { + // Reject: + // A 0****....3****.6** + // B .....1****...4**.. + // C ......2**.....5**.. + // ^ Turns #4, #5 and #6 overlapping (cross-talk with 3+ + // speakers not permitted). + const std::vector<Turn> timing = { + {"A", "t500", 0}, + {"B", "t500", 0}, + {"C", "t300", -400}, + {"A", "t500", 0}, + {"B", "t300", -100}, + {"A", "t300", -200}, + {"C", "t300", -200}, + }; + auto mock_wavreader_factory = std::unique_ptr<MockWavReaderFactory>( + new MockWavReaderFactory(kDefaultMockWavReaderFactoryParams, + kDefaultMockWavReaderFactoryParamsMap)); + + // There are two unique audio tracks to read. + EXPECT_CALL(*mock_wavreader_factory, Create(_)).Times(2); + + conversational_speech::MultiEndCall multiend_call( + timing, audiotracks_path, std::move(mock_wavreader_factory)); + EXPECT_FALSE(multiend_call.valid()); +} + +TEST(ConversationalSpeechTest, MultiEndCallWavReaderAdaptorSine) { + // Parameters with which wav files are created. + constexpr int duration_seconds = 5; + const int sample_rates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; + + for (int sample_rate : sample_rates) { + const rtc::Pathname temp_filename( + OutputPath(), "TempSineWavFile_" + std::to_string(sample_rate) + + ".wav"); + + // Write wav file. + const std::size_t num_samples = duration_seconds * sample_rate; + MockWavReaderFactory::Params params = {sample_rate, 1u, num_samples}; + CreateSineWavFile(temp_filename.pathname(), params); + + // Load wav file and check if params match. + WavReaderFactory wav_reader_factory; + MockWavReaderFactory::Params expeted_params = { + sample_rate, 1u, num_samples}; + CheckAudioTrackParams( + wav_reader_factory, temp_filename.pathname(), expeted_params); + + // Clean up. + remove(temp_filename.pathname().c_str()); + } +} + +TEST(ConversationalSpeechTest, DISABLED_MultiEndCallSimulator) { + // Simulated call (one character corresponding to 500 ms): + // A 0*********...........2*********..... + // B ...........1*********.....3********* + const std::vector<Turn> expected_timing = { + {"A", "t5000_440.wav", 0}, + {"B", "t5000_880.wav", 500}, + {"A", "t5000_440.wav", 0}, + {"B", "t5000_880.wav", -2500}, + }; + const std::size_t expected_duration_seconds = 18; + + // Create temporary audio track files. + const int sample_rate = 16000; + const std::map<std::string, SineAudioTrackParams> sine_tracks_params = { + {"t5000_440.wav", {{sample_rate, 1u, sample_rate * 5}, 440.0}}, + {"t5000_880.wav", {{sample_rate, 1u, sample_rate * 5}, 880.0}}, + }; + const std::string audiotracks_path = CreateTemporarySineAudioTracks( + sine_tracks_params); + + // Set up the multi-end call. + auto wavreader_factory = std::unique_ptr<WavReaderFactory>( + new WavReaderFactory()); + MultiEndCall multiend_call( + expected_timing, audiotracks_path, std::move(wavreader_factory)); + + // Simulate the call. + rtc::Pathname output_path(audiotracks_path); + output_path.AppendFolder("output"); + CreateDir(output_path.pathname()); + RTC_LOG(LS_VERBOSE) << "simulator output path: " << output_path.pathname(); + auto generated_audiotrak_pairs = conversational_speech::Simulate( + multiend_call, output_path.pathname()); + EXPECT_EQ(2u, generated_audiotrak_pairs->size()); + + // Check the output. + WavReaderFactory wav_reader_factory; + const MockWavReaderFactory::Params expeted_params = { + sample_rate, 1u, sample_rate * expected_duration_seconds}; + for (const auto& it : *generated_audiotrak_pairs) { + RTC_LOG(LS_VERBOSE) << "checking far/near-end for <" << it.first << ">"; + CheckAudioTrackParams( + wav_reader_factory, it.second.near_end, expeted_params); + CheckAudioTrackParams( + wav_reader_factory, it.second.far_end, expeted_params); + } + + // Clean. + EXPECT_NO_FATAL_FAILURE(DeleteFolderAndContents(audiotracks_path)); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc new file mode 100644 index 0000000000..103483d83e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.cc @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +using testing::Return; + +MockWavReader::MockWavReader( + int sample_rate, size_t num_channels, size_t num_samples) + : sample_rate_(sample_rate), num_channels_(num_channels), + num_samples_(num_samples) { + ON_CALL(*this, SampleRate()).WillByDefault(Return(sample_rate_)); + ON_CALL(*this, NumChannels()).WillByDefault(Return(num_channels_)); + ON_CALL(*this, NumSamples()).WillByDefault(Return(num_samples_)); +} + +MockWavReader::~MockWavReader() = default; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h new file mode 100644 index 0000000000..55d00b31ae --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ + +#include <cstddef> +#include <string> + +#include "api/array_view.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "test/gmock.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MockWavReader : public WavReaderInterface { + public: + MockWavReader(int sample_rate, size_t num_channels, size_t num_samples); + ~MockWavReader(); + + // TODO(alessiob): use ON_CALL to return random samples if needed. + MOCK_METHOD1(ReadFloatSamples, size_t(rtc::ArrayView<float>)); + MOCK_METHOD1(ReadInt16Samples, size_t(rtc::ArrayView<int16_t>)); + + MOCK_CONST_METHOD0(SampleRate, int()); + MOCK_CONST_METHOD0(NumChannels, size_t()); + MOCK_CONST_METHOD0(NumSamples, size_t()); + + private: + const int sample_rate_; + const size_t num_channels_; + const size_t num_samples_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc new file mode 100644 index 0000000000..76f5c80718 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h" + +#include "modules/audio_processing/test/conversational_speech/mock_wavreader.h" +#include "rtc_base/logging.h" +#include "rtc_base/pathutils.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +using testing::_; +using testing::Invoke; + +MockWavReaderFactory::MockWavReaderFactory( + const Params& default_params, + const std::map<std::string, const Params>& params) + : default_params_(default_params), + audiotrack_names_params_(params) { + ON_CALL(*this, Create(_)).WillByDefault(Invoke( + this, &MockWavReaderFactory::CreateMock)); +} + +MockWavReaderFactory::MockWavReaderFactory(const Params& default_params) + : MockWavReaderFactory(default_params, + std::map<std::string, const Params>{}) {} + +MockWavReaderFactory::~MockWavReaderFactory() = default; + +std::unique_ptr<WavReaderInterface> MockWavReaderFactory::CreateMock( + const std::string& filepath) { + // Search the parameters corresponding to filepath. + const rtc::Pathname audiotrack_file_path(filepath); + const auto it = audiotrack_names_params_.find( + audiotrack_file_path.filename()); + + // If not found, use default parameters. + if (it == audiotrack_names_params_.end()) { + RTC_LOG(LS_VERBOSE) << "using default parameters for " << filepath; + return std::unique_ptr<WavReaderInterface>( + new MockWavReader(default_params_.sample_rate, + default_params_.num_channels, + default_params_.num_samples)); + } + + // Found, use the audiotrack-specific parameters. + RTC_LOG(LS_VERBOSE) << "using ad-hoc parameters for " << filepath; + RTC_LOG(LS_VERBOSE) << "sample_rate " << it->second.sample_rate; + RTC_LOG(LS_VERBOSE) << "num_channels " << it->second.num_channels; + RTC_LOG(LS_VERBOSE) << "num_samples " << it->second.num_samples; + return std::unique_ptr<WavReaderInterface>( + new MockWavReader(it->second.sample_rate, + it->second.num_channels, + it->second.num_samples)); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h new file mode 100644 index 0000000000..4c055cf3ae --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/mock_wavreader_factory.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ + +#include <map> +#include <memory> +#include <string> + +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "test/gmock.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MockWavReaderFactory : public WavReaderAbstractFactory { + public: + struct Params{ + int sample_rate; + size_t num_channels; + size_t num_samples; + }; + + MockWavReaderFactory(const Params& default_params, + const std::map<std::string, const Params>& params); + explicit MockWavReaderFactory(const Params& default_params); + ~MockWavReaderFactory(); + + MOCK_CONST_METHOD1(Create, std::unique_ptr<WavReaderInterface>( + const std::string&)); + + private: + // Creates a MockWavReader instance using the parameters in + // audiotrack_names_params_ if the entry corresponding to filepath exists, + // otherwise creates a MockWavReader instance using the default parameters. + std::unique_ptr<WavReaderInterface> CreateMock(const std::string& filepath); + + const Params& default_params_; + std::map<std::string, const Params> audiotrack_names_params_; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MOCK_WAVREADER_FACTORY_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc new file mode 100644 index 0000000000..76cf774afc --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.cc @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" + +#include <algorithm> +#include <iterator> + +#include "rtc_base/logging.h" +#include "rtc_base/pathutils.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +MultiEndCall::MultiEndCall( + rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path, + std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory) + : timing_(timing), audiotracks_path_(audiotracks_path), + wavreader_abstract_factory_(std::move(wavreader_abstract_factory)), + valid_(false) { + FindSpeakerNames(); + if (CreateAudioTrackReaders()) + valid_ = CheckTiming(); +} + +MultiEndCall::~MultiEndCall() = default; + +void MultiEndCall::FindSpeakerNames() { + RTC_DCHECK(speaker_names_.empty()); + for (const Turn& turn : timing_) { + speaker_names_.emplace(turn.speaker_name); + } +} + +bool MultiEndCall::CreateAudioTrackReaders() { + RTC_DCHECK(audiotrack_readers_.empty()); + sample_rate_hz_ = 0; // Sample rate will be set when reading the first track. + for (const Turn& turn : timing_) { + auto it = audiotrack_readers_.find(turn.audiotrack_file_name); + if (it != audiotrack_readers_.end()) + continue; + + // Instance Pathname to retrieve the full path to the audiotrack file. + const rtc::Pathname audiotrack_file_path( + audiotracks_path_, turn.audiotrack_file_name); + + // Map the audiotrack file name to a new instance of WavReaderInterface. + std::unique_ptr<WavReaderInterface> wavreader = + wavreader_abstract_factory_->Create(audiotrack_file_path.pathname()); + + if (sample_rate_hz_ == 0) { + sample_rate_hz_ = wavreader->SampleRate(); + } else if (sample_rate_hz_ != wavreader->SampleRate()) { + RTC_LOG(LS_ERROR) + << "All the audio tracks should have the same sample rate."; + return false; + } + + if (wavreader->NumChannels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono audio tracks supported."; + return false; + } + + audiotrack_readers_.emplace( + turn.audiotrack_file_name, std::move(wavreader)); + } + + return true; +} + +bool MultiEndCall::CheckTiming() { + struct Interval { + size_t begin; + size_t end; + }; + size_t number_of_turns = timing_.size(); + auto millisecond_to_samples = [](int ms, int sr) -> int { + // Truncation may happen if the sampling rate is not an integer multiple + // of 1000 (e.g., 44100). + return ms * sr / 1000; + }; + auto in_interval = [](size_t value, const Interval& interval) { + return interval.begin <= value && value < interval.end; + }; + total_duration_samples_ = 0; + speaking_turns_.clear(); + + // Begin and end timestamps for the last two turns (unit: number of samples). + Interval second_last_turn = {0, 0}; + Interval last_turn = {0, 0}; + + // Initialize map to store speaking turn indices of each speaker (used to + // detect self cross-talk). + std::map<std::string, std::vector<size_t>> speaking_turn_indices; + for (const std::string& speaker_name : speaker_names_) { + speaking_turn_indices.emplace( + std::piecewise_construct, + std::forward_as_tuple(speaker_name), + std::forward_as_tuple()); + } + + // Parse turns. + for (size_t turn_index = 0; turn_index < number_of_turns; ++turn_index) { + const Turn& turn = timing_[turn_index]; + auto it = audiotrack_readers_.find(turn.audiotrack_file_name); + RTC_CHECK(it != audiotrack_readers_.end()) + << "Audio track reader not created"; + + // Begin and end timestamps for the current turn. + int offset_samples = millisecond_to_samples( + turn.offset, it->second->SampleRate()); + std::size_t begin_timestamp = last_turn.end + offset_samples; + std::size_t end_timestamp = begin_timestamp + it->second->NumSamples(); + RTC_LOG(LS_INFO) << "turn #" << turn_index << " " << begin_timestamp << "-" + << end_timestamp << " ms"; + + // The order is invalid if the offset is negative and its absolute value is + // larger then the duration of the previous turn. + if (offset_samples < 0 && -offset_samples > static_cast<int>( + last_turn.end - last_turn.begin)) { + RTC_LOG(LS_ERROR) << "invalid order"; + return false; + } + + // Cross-talk with 3 or more speakers occurs when the beginning of the + // current interval falls in the last two turns. + if (turn_index > 1 && in_interval(begin_timestamp, last_turn) + && in_interval(begin_timestamp, second_last_turn)) { + RTC_LOG(LS_ERROR) << "cross-talk with 3+ speakers"; + return false; + } + + // Append turn. + speaking_turns_.emplace_back( + turn.speaker_name, turn.audiotrack_file_name, + begin_timestamp, end_timestamp); + + // Save speaking turn index for self cross-talk detection. + RTC_DCHECK_EQ(speaking_turns_.size(), turn_index + 1); + speaking_turn_indices[turn.speaker_name].push_back(turn_index); + + // Update total duration of the consversational speech. + if (total_duration_samples_ < end_timestamp) + total_duration_samples_ = end_timestamp; + + // Update and continue with next turn. + second_last_turn = last_turn; + last_turn.begin = begin_timestamp; + last_turn.end = end_timestamp; + } + + // Detect self cross-talk. + for (const std::string& speaker_name : speaker_names_) { + RTC_LOG(LS_INFO) << "checking self cross-talk for <" << speaker_name << ">"; + + // Copy all turns for this speaker to new vector. + std::vector<SpeakingTurn> speaking_turns_for_name; + std::copy_if(speaking_turns_.begin(), speaking_turns_.end(), + std::back_inserter(speaking_turns_for_name), + [&speaker_name](const SpeakingTurn& st){ + return st.speaker_name == speaker_name; }); + + // Check for overlap between adjacent elements. + // This is a sufficient condition for self cross-talk since the intervals + // are sorted by begin timestamp. + auto overlap = std::adjacent_find( + speaking_turns_for_name.begin(), speaking_turns_for_name.end(), + [](const SpeakingTurn& a, const SpeakingTurn& b) { + return a.end > b.begin; }); + + if (overlap != speaking_turns_for_name.end()) { + RTC_LOG(LS_ERROR) << "Self cross-talk detected"; + return false; + } + } + + return true; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h new file mode 100644 index 0000000000..e4cee7ac96 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/multiend_call.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ + +#include <stddef.h> +#include <map> +#include <memory> +#include <set> +#include <string> +#include <utility> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/test/conversational_speech/timing.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class MultiEndCall { + public: + struct SpeakingTurn { + // Constructor required in order to use std::vector::emplace_back(). + SpeakingTurn(std::string new_speaker_name, + std::string new_audiotrack_file_name, + size_t new_begin, size_t new_end) + : speaker_name(std::move(new_speaker_name)), + audiotrack_file_name(std::move(new_audiotrack_file_name)), + begin(new_begin), end(new_end) {} + std::string speaker_name; + std::string audiotrack_file_name; + size_t begin; + size_t end; + }; + + MultiEndCall( + rtc::ArrayView<const Turn> timing, const std::string& audiotracks_path, + std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory); + ~MultiEndCall(); + + const std::set<std::string>& speaker_names() const { return speaker_names_; } + const std::map<std::string, std::unique_ptr<WavReaderInterface>>& + audiotrack_readers() const { return audiotrack_readers_; } + bool valid() const { return valid_; } + int sample_rate() const { return sample_rate_hz_; } + size_t total_duration_samples() const { return total_duration_samples_; } + const std::vector<SpeakingTurn>& speaking_turns() const { + return speaking_turns_; } + + private: + // Finds unique speaker names. + void FindSpeakerNames(); + + // Creates one WavReader instance for each unique audiotrack. It returns false + // if the audio tracks do not have the same sample rate or if they are not + // mono. + bool CreateAudioTrackReaders(); + + // Validates the speaking turns timing information. Accepts cross-talk, but + // only up to 2 speakers. Rejects unordered turns and self cross-talk. + bool CheckTiming(); + + rtc::ArrayView<const Turn> timing_; + const std::string& audiotracks_path_; + std::unique_ptr<WavReaderAbstractFactory> wavreader_abstract_factory_; + std::set<std::string> speaker_names_; + std::map<std::string, std::unique_ptr<WavReaderInterface>> + audiotrack_readers_; + bool valid_; + int sample_rate_hz_; + size_t total_duration_samples_; + std::vector<SpeakingTurn> speaking_turns_; + + RTC_DISALLOW_COPY_AND_ASSIGN(MultiEndCall); +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_MULTIEND_CALL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc new file mode 100644 index 0000000000..84a9ef55c8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/simulator.cc @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/simulator.h" + +#include <set> +#include <utility> +#include <vector> + +#include "api/array_view.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/logging.h" +#include "rtc_base/pathutils.h" +#include "rtc_base/ptr_util.h" + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::MultiEndCall; +using conversational_speech::SpeakerOutputFilePaths; +using conversational_speech::WavReaderInterface; + +// Combines output path and speaker names to define the output file paths for +// the near-end and far=end audio tracks. +std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> + InitSpeakerOutputFilePaths(const std::set<std::string>& speaker_names, + const std::string& output_path) { + // Create map. + auto speaker_output_file_paths_map = rtc::MakeUnique< + std::map<std::string, SpeakerOutputFilePaths>>(); + + // Add near-end and far-end output paths into the map. + for (const auto& speaker_name : speaker_names) { + const rtc::Pathname near_end_path( + output_path, "s_" + speaker_name + "-near_end.wav"); + RTC_LOG(LS_VERBOSE) << "The near-end audio track will be created in " + << near_end_path.pathname() << "."; + + const rtc::Pathname far_end_path( + output_path, "s_" + speaker_name + "-far_end.wav"); + RTC_LOG(LS_VERBOSE) << "The far-end audio track will be created in " + << far_end_path.pathname() << "."; + + // Add to map. + speaker_output_file_paths_map->emplace( + std::piecewise_construct, + std::forward_as_tuple(speaker_name), + std::forward_as_tuple(near_end_path.pathname(), + far_end_path.pathname())); + } + + return speaker_output_file_paths_map; +} + +// Class that provides one WavWriter for the near-end and one for the far-end +// output track of a speaker. +class SpeakerWavWriters { + public: + SpeakerWavWriters( + const SpeakerOutputFilePaths& output_file_paths, int sample_rate) + : near_end_wav_writer_(output_file_paths.near_end, sample_rate, 1u), + far_end_wav_writer_(output_file_paths.far_end, sample_rate, 1u) {} + WavWriter* near_end_wav_writer() { + return &near_end_wav_writer_; + } + WavWriter* far_end_wav_writer() { + return &far_end_wav_writer_; + } + private: + WavWriter near_end_wav_writer_; + WavWriter far_end_wav_writer_; +}; + +// Initializes one WavWriter instance for each speaker and both the near-end and +// far-end output tracks. +std::unique_ptr<std::map<std::string, SpeakerWavWriters>> + InitSpeakersWavWriters(const std::map<std::string, SpeakerOutputFilePaths>& + speaker_output_file_paths, int sample_rate) { + // Create map. + auto speaker_wav_writers_map = rtc::MakeUnique< + std::map<std::string, SpeakerWavWriters>>(); + + // Add SpeakerWavWriters instance into the map. + for (auto it = speaker_output_file_paths.begin(); + it != speaker_output_file_paths.end(); ++it) { + speaker_wav_writers_map->emplace( + std::piecewise_construct, + std::forward_as_tuple(it->first), + std::forward_as_tuple(it->second, sample_rate)); + } + + return speaker_wav_writers_map; +} + +// Reads all the samples for each audio track. +std::unique_ptr<std::map<std::string, std::vector<int16_t>>> PreloadAudioTracks( + const std::map<std::string, std::unique_ptr<WavReaderInterface>>& + audiotrack_readers) { + // Create map. + auto audiotracks_map = rtc::MakeUnique< + std::map<std::string, std::vector<int16_t>>>(); + + // Add audio track vectors. + for (auto it = audiotrack_readers.begin(); it != audiotrack_readers.end(); + ++it) { + // Add map entry. + audiotracks_map->emplace( + std::piecewise_construct, + std::forward_as_tuple(it->first), + std::forward_as_tuple(it->second->NumSamples())); + + // Read samples. + it->second->ReadInt16Samples(audiotracks_map->at(it->first)); + } + + return audiotracks_map; +} + +// Writes all the values in |source_samples| via |wav_writer|. If the number of +// previously written samples in |wav_writer| is less than |interval_begin|, it +// adds zeros as left padding. The padding corresponds to intervals during which +// a speaker is not active. +void PadLeftWriteChunk(rtc::ArrayView<const int16_t> source_samples, + size_t interval_begin, WavWriter* wav_writer) { + // Add left padding. + RTC_CHECK(wav_writer); + RTC_CHECK_GE(interval_begin, wav_writer->num_samples()); + size_t padding_size = interval_begin - wav_writer->num_samples(); + if (padding_size != 0) { + const std::vector<int16_t> padding(padding_size, 0); + wav_writer->WriteSamples(padding.data(), padding_size); + } + + // Write source samples. + wav_writer->WriteSamples(source_samples.data(), source_samples.size()); +} + +// Appends zeros via |wav_writer|. The number of zeros is always non-negative +// and equal to the difference between the previously written samples and +// |pad_samples|. +void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) { + RTC_CHECK(wav_writer); + RTC_CHECK_GE(pad_samples, wav_writer->num_samples()); + size_t padding_size = pad_samples - wav_writer->num_samples(); + if (padding_size != 0) { + const std::vector<int16_t> padding(padding_size, 0); + wav_writer->WriteSamples(padding.data(), padding_size); + } +} + +} // namespace + +namespace conversational_speech { + +std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> Simulate( + const MultiEndCall& multiend_call, const std::string& output_path) { + // Set output file paths and initialize wav writers. + const auto& speaker_names = multiend_call.speaker_names(); + auto speaker_output_file_paths = InitSpeakerOutputFilePaths( + speaker_names, output_path); + auto speakers_wav_writers = InitSpeakersWavWriters( + *speaker_output_file_paths, multiend_call.sample_rate()); + + // Preload all the input audio tracks. + const auto& audiotrack_readers = multiend_call.audiotrack_readers(); + auto audiotracks = PreloadAudioTracks(audiotrack_readers); + + // TODO(alessiob): When speaker_names.size() == 2, near-end and far-end + // across the 2 speakers are symmetric; hence, the code below could be + // replaced by only creating the near-end or the far-end. However, this would + // require to split the unit tests and document the behavior in README.md. + // In practice, it should not be an issue since the files are not expected to + // be signinificant. + + // Write near-end and far-end output tracks. + for (const auto& speaking_turn : multiend_call.speaking_turns()) { + const std::string& active_speaker_name = speaking_turn.speaker_name; + auto source_audiotrack = audiotracks->at( + speaking_turn.audiotrack_file_name); + + // Write active speaker's chunk to active speaker's near-end. + PadLeftWriteChunk(source_audiotrack, speaking_turn.begin, + speakers_wav_writers->at( + active_speaker_name).near_end_wav_writer()); + + // Write active speaker's chunk to other participants' far-ends. + for (const std::string& speaker_name : speaker_names) { + if (speaker_name == active_speaker_name) + continue; + PadLeftWriteChunk(source_audiotrack, speaking_turn.begin, + speakers_wav_writers->at( + speaker_name).far_end_wav_writer()); + } + } + + // Finalize all the output tracks with right padding. + // This is required to make all the output tracks duration equal. + size_t duration_samples = multiend_call.total_duration_samples(); + for (const std::string& speaker_name : speaker_names) { + PadRightWrite(speakers_wav_writers->at(speaker_name).near_end_wav_writer(), + duration_samples); + PadRightWrite(speakers_wav_writers->at(speaker_name).far_end_wav_writer(), + duration_samples); + } + + return speaker_output_file_paths; +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/simulator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/simulator.h new file mode 100644 index 0000000000..671c3b9c73 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/simulator.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ + +#include <map> +#include <memory> +#include <string> +#include <utility> + +#include "modules/audio_processing/test/conversational_speech/multiend_call.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct SpeakerOutputFilePaths { + SpeakerOutputFilePaths(const std::string& new_near_end, + const std::string& new_far_end) + : near_end(new_near_end), + far_end(new_far_end) {} + // Paths to the near-end and far-end audio track files. + const std::string near_end; + const std::string far_end; +}; + +// Generates the near-end and far-end audio track pairs for each speaker. +std::unique_ptr<std::map<std::string, SpeakerOutputFilePaths>> + Simulate(const MultiEndCall& multiend_call, const std::string& output_path); + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_SIMULATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/timing.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/timing.cc new file mode 100644 index 0000000000..773a42ebd7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/timing.cc @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/timing.h" + +#include <fstream> +#include <iostream> + +#include "rtc_base/stringencode.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +bool Turn::operator==(const Turn &b) const { + return b.speaker_name == speaker_name && + b.audiotrack_file_name == audiotrack_file_name && + b.offset == offset; +} + +std::vector<Turn> LoadTiming(const std::string& timing_filepath) { + // Line parser. + auto parse_line = [](const std::string& line) { + std::vector<std::string> fields; + rtc::split(line, ' ', &fields); + RTC_CHECK_EQ(fields.size(), 3); + return Turn(fields[0], fields[1], std::atol(fields[2].c_str())); + }; + + // Init. + std::vector<Turn> timing; + + // Parse lines. + std::string line; + std::ifstream infile(timing_filepath); + while (std::getline(infile, line)) { + if (line.empty()) + continue; + timing.push_back(parse_line(line)); + } + infile.close(); + + return timing; +} + +void SaveTiming(const std::string& timing_filepath, + rtc::ArrayView<const Turn> timing) { + std::ofstream outfile(timing_filepath); + RTC_CHECK(outfile.is_open()); + for (const Turn& turn : timing) { + outfile << turn.speaker_name << " " << turn.audiotrack_file_name + << " " << turn.offset << std::endl; + } + outfile.close(); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/timing.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/timing.h new file mode 100644 index 0000000000..dc43361815 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/timing.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ + +#include <string> +#include <vector> + +#include "api/array_view.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +struct Turn{ + Turn(std::string new_speaker_name, std::string new_audiotrack_file_name, + int new_offset) + : speaker_name(new_speaker_name), + audiotrack_file_name(new_audiotrack_file_name), + offset(new_offset) {} + bool operator==(const Turn &b) const; + std::string speaker_name; + std::string audiotrack_file_name; + int offset; +}; + +// Loads a list of turns from a file. +std::vector<Turn> LoadTiming(const std::string& timing_filepath); + +// Writes a list of turns into a file. +void SaveTiming(const std::string& timing_filepath, + rtc::ArrayView<const Turn> timing); + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_TIMING_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h new file mode 100644 index 0000000000..83fda0d45c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ + +#include <memory> +#include <string> + +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderAbstractFactory { + public: + virtual ~WavReaderAbstractFactory() = default; + virtual std::unique_ptr<WavReaderInterface> Create( + const std::string& filepath) const = 0; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_ABSTRACT_FACTORY_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc new file mode 100644 index 0000000000..8342d3fed8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/conversational_speech/wavreader_factory.h" + +#include <cstddef> + +#include "api/array_view.h" +#include "common_audio/wav_file.h" +#include "rtc_base/checks.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { +namespace test { +namespace { + +using conversational_speech::WavReaderInterface; + +class WavReaderAdaptor final : public WavReaderInterface { + public: + explicit WavReaderAdaptor(const std::string& filepath) + : wav_reader_(filepath) {} + ~WavReaderAdaptor() override = default; + + size_t ReadFloatSamples(rtc::ArrayView<float> samples) override { + return wav_reader_.ReadSamples(samples.size(), samples.begin()); + } + + size_t ReadInt16Samples(rtc::ArrayView<int16_t> samples) override { + return wav_reader_.ReadSamples(samples.size(), samples.begin()); + } + + int SampleRate() const override { + return wav_reader_.sample_rate(); + } + + size_t NumChannels() const override { + return wav_reader_.num_channels(); + } + + size_t NumSamples() const override { + return wav_reader_.num_samples(); + } + + private: + WavReader wav_reader_; +}; + +} // namespace + +namespace conversational_speech { + +WavReaderFactory::WavReaderFactory() = default; + +WavReaderFactory::~WavReaderFactory() = default; + +std::unique_ptr<WavReaderInterface> WavReaderFactory::Create( + const std::string& filepath) const { + return std::unique_ptr<WavReaderAdaptor>(new WavReaderAdaptor(filepath)); +} + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h new file mode 100644 index 0000000000..e168d0d71f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_factory.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ + +#include <memory> +#include <string> + +#include "modules/audio_processing/test/conversational_speech/wavreader_abstract_factory.h" +#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderFactory : public WavReaderAbstractFactory { + public: + WavReaderFactory(); + ~WavReaderFactory() override; + std::unique_ptr<WavReaderInterface> Create(const std::string& filepath) const + override; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_FACTORY_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h new file mode 100644 index 0000000000..3aea273ad2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/conversational_speech/wavreader_interface.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ +#define MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ + +#include <stddef.h> + +#include "api/array_view.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { +namespace test { +namespace conversational_speech { + +class WavReaderInterface { + public: + virtual ~WavReaderInterface() = default; + + // Returns the number of samples read. + virtual size_t ReadFloatSamples(rtc::ArrayView<float> samples) = 0; + virtual size_t ReadInt16Samples(rtc::ArrayView<int16_t> samples) = 0; + + // Getters. + virtual int SampleRate() const = 0; + virtual size_t NumChannels() const = 0; + virtual size_t NumSamples() const = 0; +}; + +} // namespace conversational_speech +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_CONVERSATIONAL_SPEECH_WAVREADER_INTERFACE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_replayer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_replayer.cc new file mode 100644 index 0000000000..a027d07a9f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_replayer.cc @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/debug_dump_replayer.h" + +#include "modules/audio_processing/test/protobuf_utils.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +namespace { + +void MaybeResetBuffer(std::unique_ptr<ChannelBuffer<float>>* buffer, + const StreamConfig& config) { + auto& buffer_ref = *buffer; + if (!buffer_ref.get() || buffer_ref->num_frames() != config.num_frames() || + buffer_ref->num_channels() != config.num_channels()) { + buffer_ref.reset(new ChannelBuffer<float>(config.num_frames(), + config.num_channels())); + } +} + +} // namespace + +DebugDumpReplayer::DebugDumpReplayer() + : input_(nullptr), // will be created upon usage. + reverse_(nullptr), + output_(nullptr), + apm_(nullptr), + debug_file_(nullptr) {} + +DebugDumpReplayer::~DebugDumpReplayer() { + if (debug_file_) + fclose(debug_file_); +} + +bool DebugDumpReplayer::SetDumpFile(const std::string& filename) { + debug_file_ = fopen(filename.c_str(), "rb"); + LoadNextMessage(); + return debug_file_; +} + +// Get next event that has not run. +rtc::Optional<audioproc::Event> DebugDumpReplayer::GetNextEvent() const { + if (!has_next_event_) + return rtc::nullopt; + else + return next_event_; +} + +// Run the next event. Returns the event type. +bool DebugDumpReplayer::RunNextEvent() { + if (!has_next_event_) + return false; + switch (next_event_.type()) { + case audioproc::Event::INIT: + OnInitEvent(next_event_.init()); + break; + case audioproc::Event::STREAM: + OnStreamEvent(next_event_.stream()); + break; + case audioproc::Event::REVERSE_STREAM: + OnReverseStreamEvent(next_event_.reverse_stream()); + break; + case audioproc::Event::CONFIG: + OnConfigEvent(next_event_.config()); + break; + case audioproc::Event::UNKNOWN_EVENT: + // We do not expect to receive UNKNOWN event. + return false; + } + LoadNextMessage(); + return true; +} + +const ChannelBuffer<float>* DebugDumpReplayer::GetOutput() const { + return output_.get(); +} + +StreamConfig DebugDumpReplayer::GetOutputConfig() const { + return output_config_; +} + +// OnInitEvent reset the input/output/reserve channel format. +void DebugDumpReplayer::OnInitEvent(const audioproc::Init& msg) { + RTC_CHECK(msg.has_num_input_channels()); + RTC_CHECK(msg.has_output_sample_rate()); + RTC_CHECK(msg.has_num_output_channels()); + RTC_CHECK(msg.has_reverse_sample_rate()); + RTC_CHECK(msg.has_num_reverse_channels()); + + input_config_ = StreamConfig(msg.sample_rate(), msg.num_input_channels()); + output_config_ = + StreamConfig(msg.output_sample_rate(), msg.num_output_channels()); + reverse_config_ = + StreamConfig(msg.reverse_sample_rate(), msg.num_reverse_channels()); + + MaybeResetBuffer(&input_, input_config_); + MaybeResetBuffer(&output_, output_config_); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +// OnStreamEvent replays an input signal and verifies the output. +void DebugDumpReplayer::OnStreamEvent(const audioproc::Stream& msg) { + // APM should have been created. + RTC_CHECK(apm_.get()); + + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->gain_control()->set_stream_analog_level(msg.level())); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->set_stream_delay_ms(msg.delay())); + + apm_->echo_cancellation()->set_stream_drift_samples(msg.drift()); + if (msg.has_keypress()) { + apm_->set_stream_key_pressed(msg.keypress()); + } else { + apm_->set_stream_key_pressed(true); + } + + RTC_CHECK_EQ(input_config_.num_channels(), + static_cast<size_t>(msg.input_channel_size())); + RTC_CHECK_EQ(input_config_.num_frames() * sizeof(float), + msg.input_channel(0).size()); + + for (int i = 0; i < msg.input_channel_size(); ++i) { + memcpy(input_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); +} + +void DebugDumpReplayer::OnReverseStreamEvent( + const audioproc::ReverseStream& msg) { + // APM should have been created. + RTC_CHECK(apm_.get()); + + RTC_CHECK_GT(msg.channel_size(), 0); + RTC_CHECK_EQ(reverse_config_.num_channels(), + static_cast<size_t>(msg.channel_size())); + RTC_CHECK_EQ(reverse_config_.num_frames() * sizeof(float), + msg.channel(0).size()); + + for (int i = 0; i < msg.channel_size(); ++i) { + memcpy(reverse_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + + RTC_CHECK_EQ( + AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), reverse_config_, + reverse_config_, reverse_->channels())); +} + +void DebugDumpReplayer::OnConfigEvent(const audioproc::Config& msg) { + MaybeRecreateApm(msg); + ConfigureApm(msg); +} + +void DebugDumpReplayer::MaybeRecreateApm(const audioproc::Config& msg) { + // These configurations cannot be changed on the fly. + Config config; + RTC_CHECK(msg.has_aec_delay_agnostic_enabled()); + config.Set<DelayAgnostic>( + new DelayAgnostic(msg.aec_delay_agnostic_enabled())); + + RTC_CHECK(msg.has_noise_robust_agc_enabled()); + config.Set<ExperimentalAgc>( + new ExperimentalAgc(msg.noise_robust_agc_enabled())); + + RTC_CHECK(msg.has_transient_suppression_enabled()); + config.Set<ExperimentalNs>( + new ExperimentalNs(msg.transient_suppression_enabled())); + + RTC_CHECK(msg.has_aec_extended_filter_enabled()); + config.Set<ExtendedFilter>( + new ExtendedFilter(msg.aec_extended_filter_enabled())); + + RTC_CHECK(msg.has_intelligibility_enhancer_enabled()); + config.Set<Intelligibility>( + new Intelligibility(msg.intelligibility_enhancer_enabled())); + + // We only create APM once, since changes on these fields should not + // happen in current implementation. + if (!apm_.get()) { + apm_.reset(AudioProcessing::Create(config)); + } +} + +void DebugDumpReplayer::ConfigureApm(const audioproc::Config& msg) { + AudioProcessing::Config apm_config; + + // AEC configs. + RTC_CHECK(msg.has_aec_enabled()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->echo_cancellation()->Enable(msg.aec_enabled())); + + RTC_CHECK(msg.has_aec_drift_compensation_enabled()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->echo_cancellation()->enable_drift_compensation( + msg.aec_drift_compensation_enabled())); + + RTC_CHECK(msg.has_aec_suppression_level()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->echo_cancellation()->set_suppression_level( + static_cast<EchoCancellation::SuppressionLevel>( + msg.aec_suppression_level()))); + + // AECM configs. + RTC_CHECK(msg.has_aecm_enabled()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->echo_control_mobile()->Enable(msg.aecm_enabled())); + + RTC_CHECK(msg.has_aecm_comfort_noise_enabled()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->echo_control_mobile()->enable_comfort_noise( + msg.aecm_comfort_noise_enabled())); + + RTC_CHECK(msg.has_aecm_routing_mode()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->echo_control_mobile()->set_routing_mode( + static_cast<EchoControlMobile::RoutingMode>( + msg.aecm_routing_mode()))); + + // AGC configs. + RTC_CHECK(msg.has_agc_enabled()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->gain_control()->Enable(msg.agc_enabled())); + + RTC_CHECK(msg.has_agc_mode()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->gain_control()->set_mode( + static_cast<GainControl::Mode>(msg.agc_mode()))); + + RTC_CHECK(msg.has_agc_limiter_enabled()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->gain_control()->enable_limiter(msg.agc_limiter_enabled())); + + // HPF configs. + RTC_CHECK(msg.has_hpf_enabled()); + apm_config.high_pass_filter.enabled = msg.hpf_enabled(); + + // NS configs. + RTC_CHECK(msg.has_ns_enabled()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->noise_suppression()->Enable(msg.ns_enabled())); + + RTC_CHECK(msg.has_ns_level()); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->noise_suppression()->set_level( + static_cast<NoiseSuppression::Level>(msg.ns_level()))); + + apm_->ApplyConfig(apm_config); +} + +void DebugDumpReplayer::LoadNextMessage() { + has_next_event_ = + debug_file_ && ReadMessageFromFile(debug_file_, &next_event_); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_replayer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_replayer.h new file mode 100644 index 0000000000..4cd961dd7f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_replayer.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ + +#include <memory> +#include <string> + +#include "common_audio/channel_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/ignore_wundef.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { +namespace test { + +class DebugDumpReplayer { + public: + DebugDumpReplayer(); + ~DebugDumpReplayer(); + + // Set dump file + bool SetDumpFile(const std::string& filename); + + // Return next event. + rtc::Optional<audioproc::Event> GetNextEvent() const; + + // Run the next event. Returns true if succeeded. + bool RunNextEvent(); + + const ChannelBuffer<float>* GetOutput() const; + StreamConfig GetOutputConfig() const; + + private: + // Following functions are facilities for replaying debug dumps. + void OnInitEvent(const audioproc::Init& msg); + void OnStreamEvent(const audioproc::Stream& msg); + void OnReverseStreamEvent(const audioproc::ReverseStream& msg); + void OnConfigEvent(const audioproc::Config& msg); + + void MaybeRecreateApm(const audioproc::Config& msg); + void ConfigureApm(const audioproc::Config& msg); + + void LoadNextMessage(); + + // Buffer for APM input/output. + std::unique_ptr<ChannelBuffer<float>> input_; + std::unique_ptr<ChannelBuffer<float>> reverse_; + std::unique_ptr<ChannelBuffer<float>> output_; + + std::unique_ptr<AudioProcessing> apm_; + + FILE* debug_file_; + + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; + + bool has_next_event_; + audioproc::Event next_event_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_DEBUG_DUMP_REPLAYER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_test.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_test.cc new file mode 100644 index 0000000000..2c839d3bac --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/debug_dump_test.cc @@ -0,0 +1,620 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stddef.h> // size_t + +#include <memory> +#include <string> +#include <vector> + +#include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "modules/audio_processing/aec_dump/aec_dump_factory.h" +#include "modules/audio_processing/test/debug_dump_replayer.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/task_queue.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { +namespace test { + +namespace { + +void MaybeResetBuffer(std::unique_ptr<ChannelBuffer<float>>* buffer, + const StreamConfig& config) { + auto& buffer_ref = *buffer; + if (!buffer_ref.get() || buffer_ref->num_frames() != config.num_frames() || + buffer_ref->num_channels() != config.num_channels()) { + buffer_ref.reset(new ChannelBuffer<float>(config.num_frames(), + config.num_channels())); + } +} + +class DebugDumpGenerator { + public: + DebugDumpGenerator(const std::string& input_file_name, + int input_rate_hz, + int input_channels, + const std::string& reverse_file_name, + int reverse_rate_hz, + int reverse_channels, + const Config& config, + const std::string& dump_file_name, + bool enable_aec3); + + // Constructor that uses default input files. + explicit DebugDumpGenerator(const Config& config, + const AudioProcessing::Config& apm_config, + bool enable_aec3); + + explicit DebugDumpGenerator(const Config& config, + const AudioProcessing::Config& apm_config); + + ~DebugDumpGenerator(); + + // Changes the sample rate of the input audio to the APM. + void SetInputRate(int rate_hz); + + // Sets if converts stereo input signal to mono by discarding other channels. + void ForceInputMono(bool mono); + + // Changes the sample rate of the reverse audio to the APM. + void SetReverseRate(int rate_hz); + + // Sets if converts stereo reverse signal to mono by discarding other + // channels. + void ForceReverseMono(bool mono); + + // Sets the required sample rate of the APM output. + void SetOutputRate(int rate_hz); + + // Sets the required channels of the APM output. + void SetOutputChannels(int channels); + + std::string dump_file_name() const { return dump_file_name_; } + + void StartRecording(); + void Process(size_t num_blocks); + void StopRecording(); + AudioProcessing* apm() const { return apm_.get(); } + + private: + static void ReadAndDeinterleave(ResampleInputAudioFile* audio, int channels, + const StreamConfig& config, + float* const* buffer); + + // APM input/output settings. + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; + + // Input file format. + const std::string input_file_name_; + ResampleInputAudioFile input_audio_; + const int input_file_channels_; + + // Reverse file format. + const std::string reverse_file_name_; + ResampleInputAudioFile reverse_audio_; + const int reverse_file_channels_; + + // Buffer for APM input/output. + std::unique_ptr<ChannelBuffer<float>> input_; + std::unique_ptr<ChannelBuffer<float>> reverse_; + std::unique_ptr<ChannelBuffer<float>> output_; + + rtc::TaskQueue worker_queue_; + std::unique_ptr<AudioProcessing> apm_; + + const std::string dump_file_name_; +}; + +DebugDumpGenerator::DebugDumpGenerator(const std::string& input_file_name, + int input_rate_hz, + int input_channels, + const std::string& reverse_file_name, + int reverse_rate_hz, + int reverse_channels, + const Config& config, + const std::string& dump_file_name, + bool enable_aec3) + : input_config_(input_rate_hz, input_channels), + reverse_config_(reverse_rate_hz, reverse_channels), + output_config_(input_rate_hz, input_channels), + input_audio_(input_file_name, input_rate_hz, input_rate_hz), + input_file_channels_(input_channels), + reverse_audio_(reverse_file_name, reverse_rate_hz, reverse_rate_hz), + reverse_file_channels_(reverse_channels), + input_(new ChannelBuffer<float>(input_config_.num_frames(), + input_config_.num_channels())), + reverse_(new ChannelBuffer<float>(reverse_config_.num_frames(), + reverse_config_.num_channels())), + output_(new ChannelBuffer<float>(output_config_.num_frames(), + output_config_.num_channels())), + worker_queue_("debug_dump_generator_worker_queue"), + apm_(AudioProcessing::Create( + config, + nullptr, + (enable_aec3 ? std::unique_ptr<EchoControlFactory>( + new EchoCanceller3Factory()) + : nullptr), + nullptr)), + dump_file_name_(dump_file_name) {} + +DebugDumpGenerator::DebugDumpGenerator( + const Config& config, + const AudioProcessing::Config& apm_config, + bool enable_aec3) + : DebugDumpGenerator(ResourcePath("near32_stereo", "pcm"), + 32000, + 2, + ResourcePath("far32_stereo", "pcm"), + 32000, + 2, + config, + TempFilename(OutputPath(), "debug_aec"), + enable_aec3) { + apm_->ApplyConfig(apm_config); +} + +DebugDumpGenerator::DebugDumpGenerator( + const Config& config, + const AudioProcessing::Config& apm_config) + : DebugDumpGenerator(config, apm_config, false) { + apm_->ApplyConfig(apm_config); +} + +DebugDumpGenerator::~DebugDumpGenerator() { + remove(dump_file_name_.c_str()); +} + +void DebugDumpGenerator::SetInputRate(int rate_hz) { + input_audio_.set_output_rate_hz(rate_hz); + input_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::ForceInputMono(bool mono) { + const int channels = mono ? 1 : input_file_channels_; + input_config_.set_num_channels(channels); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::SetReverseRate(int rate_hz) { + reverse_audio_.set_output_rate_hz(rate_hz); + reverse_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::ForceReverseMono(bool mono) { + const int channels = mono ? 1 : reverse_file_channels_; + reverse_config_.set_num_channels(channels); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::SetOutputRate(int rate_hz) { + output_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::SetOutputChannels(int channels) { + output_config_.set_num_channels(channels); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::StartRecording() { + apm_->AttachAecDump( + AecDumpFactory::Create(dump_file_name_.c_str(), -1, &worker_queue_)); +} + +void DebugDumpGenerator::Process(size_t num_blocks) { + for (size_t i = 0; i < num_blocks; ++i) { + ReadAndDeinterleave(&reverse_audio_, reverse_file_channels_, + reverse_config_, reverse_->channels()); + ReadAndDeinterleave(&input_audio_, input_file_channels_, input_config_, + input_->channels()); + RTC_CHECK_EQ(AudioProcessing::kNoError, apm_->set_stream_delay_ms(100)); + apm_->set_stream_key_pressed(i % 10 == 9); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); + + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), + reverse_config_, + reverse_config_, + reverse_->channels())); + } +} + +void DebugDumpGenerator::StopRecording() { + apm_->DetachAecDump(); +} + +void DebugDumpGenerator::ReadAndDeinterleave(ResampleInputAudioFile* audio, + int channels, + const StreamConfig& config, + float* const* buffer) { + const size_t num_frames = config.num_frames(); + const int out_channels = config.num_channels(); + + std::vector<int16_t> signal(channels * num_frames); + + audio->Read(num_frames * channels, &signal[0]); + + // We only allow reducing number of channels by discarding some channels. + RTC_CHECK_LE(out_channels, channels); + for (int channel = 0; channel < out_channels; ++channel) { + for (size_t i = 0; i < num_frames; ++i) { + buffer[channel][i] = S16ToFloat(signal[i * channels + channel]); + } + } +} + +} // namespace + +class DebugDumpTest : public ::testing::Test { + public: + // VerifyDebugDump replays a debug dump using APM and verifies that the result + // is bit-exact-identical to the output channel in the dump. This is only + // guaranteed if the debug dump is started on the first frame. + void VerifyDebugDump(const std::string& in_filename); + + private: + DebugDumpReplayer debug_dump_replayer_; +}; + +void DebugDumpTest::VerifyDebugDump(const std::string& in_filename) { + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(in_filename)); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::STREAM) { + const audioproc::Stream* msg = &event->stream(); + const StreamConfig output_config = debug_dump_replayer_.GetOutputConfig(); + const ChannelBuffer<float>* output = debug_dump_replayer_.GetOutput(); + // Check that output of APM is bit-exact to the output in the dump. + ASSERT_EQ(output_config.num_channels(), + static_cast<size_t>(msg->output_channel_size())); + ASSERT_EQ(output_config.num_frames() * sizeof(float), + msg->output_channel(0).size()); + for (int i = 0; i < msg->output_channel_size(); ++i) { + ASSERT_EQ(0, memcmp(output->channels()[i], + msg->output_channel(i).data(), + msg->output_channel(i).size())); + } + } + } +} + +TEST_F(DebugDumpTest, SimpleCase) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeInputFormat) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + + generator.StartRecording(); + generator.Process(100); + generator.SetInputRate(48000); + + generator.ForceInputMono(true); + // Number of output channel should not be larger than that of input. APM will + // fail otherwise. + generator.SetOutputChannels(1); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeReverseFormat) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.SetReverseRate(48000); + generator.ForceReverseMono(true); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeOutputFormat) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.SetOutputRate(48000); + generator.SetOutputChannels(1); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleAec) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + + EchoCancellation* aec = generator.apm()->echo_cancellation(); + EXPECT_EQ(AudioProcessing::kNoError, aec->Enable(!aec->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleDelayAgnosticAec) { + Config config; + config.Set<DelayAgnostic>(new DelayAgnostic(true)); + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + + EchoCancellation* aec = generator.apm()->echo_cancellation(); + EXPECT_EQ(AudioProcessing::kNoError, aec->Enable(!aec->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, VerifyRefinedAdaptiveFilterExperimentalString) { + Config config; + config.Set<RefinedAdaptiveFilter>(new RefinedAdaptiveFilter(true)); + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "RefinedAdaptiveFilter", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyCombinedExperimentalStringInclusive) { + Config config; + AudioProcessing::Config apm_config; + config.Set<RefinedAdaptiveFilter>(new RefinedAdaptiveFilter(true)); + // Arbitrarily set clipping gain to 17, which will never be the default. + config.Set<ExperimentalAgc>(new ExperimentalAgc(true, 0, 17)); + bool enable_aec3 = true; + DebugDumpGenerator generator(config, apm_config, enable_aec3); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "RefinedAdaptiveFilter", + msg->experiments_description().c_str()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "EchoController", + msg->experiments_description().c_str()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "AgcClippingLevelExperiment", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyCombinedExperimentalStringExclusive) { + Config config; + config.Set<RefinedAdaptiveFilter>(new RefinedAdaptiveFilter(true)); + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "RefinedAdaptiveFilter", + msg->experiments_description().c_str()); + EXPECT_PRED_FORMAT2(testing::IsNotSubstring, "AEC3", + msg->experiments_description().c_str()); + EXPECT_PRED_FORMAT2(testing::IsNotSubstring, "AgcClippingLevelExperiment", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyAec3ExperimentalString) { + Config config; + AudioProcessing::Config apm_config; + DebugDumpGenerator generator(config, apm_config, true); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "EchoController", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyLevelControllerExperimentalString) { + Config config; + AudioProcessing::Config apm_config; + apm_config.level_controller.enabled = true; + DebugDumpGenerator generator(config, apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "LevelController", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyAgcClippingLevelExperimentalString) { + Config config; + // Arbitrarily set clipping gain to 17, which will never be the default. + config.Set<ExperimentalAgc>(new ExperimentalAgc(true, 0, 17)); + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "AgcClippingLevelExperiment", + msg->experiments_description().c_str()); + } + } +} + +TEST_F(DebugDumpTest, VerifyEmptyExperimentalString) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional<audioproc::Event> event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_EQ(0u, msg->experiments_description().size()); + } + } +} + +TEST_F(DebugDumpTest, ToggleAecLevel) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + EchoCancellation* aec = generator.apm()->echo_cancellation(); + EXPECT_EQ(AudioProcessing::kNoError, aec->Enable(true)); + EXPECT_EQ(AudioProcessing::kNoError, + aec->set_suppression_level(EchoCancellation::kLowSuppression)); + generator.StartRecording(); + generator.Process(100); + + EXPECT_EQ(AudioProcessing::kNoError, + aec->set_suppression_level(EchoCancellation::kHighSuppression)); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +// AGC is not supported on Android or iOS. +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) +#define MAYBE_ToggleAgc DISABLED_ToggleAgc +#else +#define MAYBE_ToggleAgc ToggleAgc +#endif +TEST_F(DebugDumpTest, MAYBE_ToggleAgc) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + + GainControl* agc = generator.apm()->gain_control(); + EXPECT_EQ(AudioProcessing::kNoError, agc->Enable(!agc->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleNs) { + Config config; + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + + NoiseSuppression* ns = generator.apm()->noise_suppression(); + EXPECT_EQ(AudioProcessing::kNoError, ns->Enable(!ns->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, TransientSuppressionOn) { + Config config; + config.Set<ExperimentalNs>(new ExperimentalNs(true)); + DebugDumpGenerator generator(config, AudioProcessing::Config()); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools.cc new file mode 100644 index 0000000000..eba852d4b5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/echo_canceller_test_tools.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void RandomizeSampleVector(Random* random_generator, rtc::ArrayView<float> v) { + for (auto& v_k : v) { + v_k = 2 * 32767.f * random_generator->Rand<float>() - 32767.f; + } +} + +template <typename T> +void DelayBuffer<T>::Delay(rtc::ArrayView<const T> x, + rtc::ArrayView<T> x_delayed) { + RTC_DCHECK_EQ(x.size(), x_delayed.size()); + if (buffer_.empty()) { + std::copy(x.begin(), x.end(), x_delayed.begin()); + } else { + for (size_t k = 0; k < x.size(); ++k) { + x_delayed[k] = buffer_[next_insert_index_]; + buffer_[next_insert_index_] = x[k]; + next_insert_index_ = (next_insert_index_ + 1) % buffer_.size(); + } + } +} + +template class DelayBuffer<float>; +template class DelayBuffer<int>; +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools.h new file mode 100644 index 0000000000..1cb5b7e762 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ + +#include <algorithm> +#include <vector> + +#include "api/array_view.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/random.h" + +namespace webrtc { + +// Randomizes the elements in a vector with values -32767.f:32767.f. +void RandomizeSampleVector(Random* random_generator, rtc::ArrayView<float> v); + +// Class for delaying a signal a fixed number of samples. +template <typename T> +class DelayBuffer { + public: + explicit DelayBuffer(size_t delay) : buffer_(delay) {} + ~DelayBuffer() = default; + + // Produces a delayed signal copy of x. + void Delay(rtc::ArrayView<const T> x, rtc::ArrayView<T> x_delayed); + + private: + std::vector<T> buffer_; + size_t next_insert_index_ = 0; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(DelayBuffer); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_ECHO_CANCELLER_TEST_TOOLS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc new file mode 100644 index 0000000000..4d5a4fb34b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/echo_canceller_test_tools.h" + +#include <vector> + +#include "api/array_view.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(EchoCancellerTestTools, FloatDelayBuffer) { + constexpr size_t kDelay = 10; + DelayBuffer<float> delay_buffer(kDelay); + std::vector<float> v(1000, 0.f); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + std::vector<float> v_delayed = v; + constexpr size_t kBlockSize = 50; + for (size_t k = 0; k < rtc::CheckedDivExact(v.size(), kBlockSize); ++k) { + delay_buffer.Delay( + rtc::ArrayView<const float>(&v[k * kBlockSize], kBlockSize), + rtc::ArrayView<float>(&v_delayed[k * kBlockSize], kBlockSize)); + } + for (size_t k = kDelay; k < v.size(); ++k) { + EXPECT_EQ(v[k - kDelay], v_delayed[k]); + } +} + +TEST(EchoCancellerTestTools, IntDelayBuffer) { + constexpr size_t kDelay = 10; + DelayBuffer<int> delay_buffer(kDelay); + std::vector<int> v(1000, 0); + for (size_t k = 0; k < v.size(); ++k) { + v[k] = k; + } + std::vector<int> v_delayed = v; + const size_t kBlockSize = 50; + for (size_t k = 0; k < rtc::CheckedDivExact(v.size(), kBlockSize); ++k) { + delay_buffer.Delay( + rtc::ArrayView<const int>(&v[k * kBlockSize], kBlockSize), + rtc::ArrayView<int>(&v_delayed[k * kBlockSize], kBlockSize)); + } + for (size_t k = kDelay; k < v.size(); ++k) { + EXPECT_EQ(v[k - kDelay], v_delayed[k]); + } +} + +TEST(EchoCancellerTestTools, RandomizeSampleVector) { + Random random_generator(42U); + std::vector<float> v(50, 0.f); + std::vector<float> v_ref = v; + RandomizeSampleVector(&random_generator, v); + EXPECT_NE(v, v_ref); + v_ref = v; + RandomizeSampleVector(&random_generator, v); + EXPECT_NE(v, v_ref); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device.cc new file mode 100644 index 0000000000..aee3dcef61 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device.cc @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/fake_recording_device.h" + +#include <algorithm> + +#include "rtc_base/logging.h" +#include "rtc_base/ptr_util.h" + +namespace webrtc { +namespace test { + +namespace { + +constexpr int16_t kInt16SampleMin = -32768; +constexpr int16_t kInt16SampleMax = 32767; +constexpr float kFloatSampleMin = -32768.f; +constexpr float kFloatSampleMax = 32767.0f; + +} // namespace + +// Abstract class for the different fake recording devices. +class FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceWorker(const int initial_mic_level) + : mic_level_(initial_mic_level) {} + int mic_level() const { return mic_level_; } + void set_mic_level(const int level) { mic_level_ = level; } + void set_undo_mic_level(const int level) { undo_mic_level_ = level; } + virtual ~FakeRecordingDeviceWorker() = default; + virtual void ModifyBufferInt16(AudioFrame* buffer) = 0; + virtual void ModifyBufferFloat(ChannelBuffer<float>* buffer) = 0; + + protected: + // Mic level to simulate. + int mic_level_; + // Optional mic level to undo. + rtc::Optional<int> undo_mic_level_; +}; + +namespace { + +// Identity fake recording device. The samples are not modified, which is +// equivalent to a constant gain curve at 1.0 - only used for testing. +class FakeRecordingDeviceIdentity final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceIdentity(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceIdentity() override = default; + void ModifyBufferInt16(AudioFrame* buffer) override {} + void ModifyBufferFloat(ChannelBuffer<float>* buffer) override {} +}; + +// Linear fake recording device. The gain curve is a linear function mapping the +// mic levels range [0, 255] to [0.0, 1.0]. +class FakeRecordingDeviceLinear final : public FakeRecordingDeviceWorker { + public: + explicit FakeRecordingDeviceLinear(const int initial_mic_level) + : FakeRecordingDeviceWorker(initial_mic_level) {} + ~FakeRecordingDeviceLinear() override = default; + void ModifyBufferInt16(AudioFrame* buffer) override { + const size_t number_of_samples = + buffer->samples_per_channel_ * buffer->num_channels_; + int16_t* data = buffer->mutable_data(); + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const float divisor = + (undo_mic_level_ && *undo_mic_level_ > 0) ? *undo_mic_level_ : 255.f; + for (size_t i = 0; i < number_of_samples; ++i) { + data[i] = + std::max(kInt16SampleMin, + std::min(kInt16SampleMax, + static_cast<int16_t>(static_cast<float>(data[i]) * + mic_level_ / divisor))); + } + } + void ModifyBufferFloat(ChannelBuffer<float>* buffer) override { + // If an undo level is specified, virtually restore the unmodified + // microphone level; otherwise simulate the mic gain only. + const float divisor = + (undo_mic_level_ && *undo_mic_level_ > 0) ? *undo_mic_level_ : 255.f; + for (size_t c = 0; c < buffer->num_channels(); ++c) { + for (size_t i = 0; i < buffer->num_frames(); ++i) { + buffer->channels()[c][i] = + std::max(kFloatSampleMin, + std::min(kFloatSampleMax, + buffer->channels()[c][i] * mic_level_ / divisor)); + } + } + } +}; + +} // namespace + +FakeRecordingDevice::FakeRecordingDevice(int initial_mic_level, + int device_kind) { + switch (device_kind) { + case 0: + worker_ = rtc::MakeUnique<FakeRecordingDeviceIdentity>(initial_mic_level); + break; + case 1: + worker_ = rtc::MakeUnique<FakeRecordingDeviceLinear>(initial_mic_level); + break; + default: + RTC_NOTREACHED(); + break; + } +} + +FakeRecordingDevice::~FakeRecordingDevice() = default; + +int FakeRecordingDevice::MicLevel() const { + RTC_CHECK(worker_); + return worker_->mic_level(); +} + +void FakeRecordingDevice::SetMicLevel(const int level) { + RTC_CHECK(worker_); + if (level != worker_->mic_level()) + RTC_LOG(LS_INFO) << "Simulate mic level update: " << level; + worker_->set_mic_level(level); +} + +void FakeRecordingDevice::SetUndoMicLevel(const int level) { + RTC_DCHECK(worker_); + // TODO(alessiob): The behavior with undo level equal to zero is not clear yet + // and will be defined in future CLs once more FakeRecordingDeviceWorker + // implementations need to be added. + RTC_CHECK(level > 0) << "Zero undo mic level is unsupported"; + worker_->set_undo_mic_level(level); +} + +void FakeRecordingDevice::SimulateAnalogGain(AudioFrame* buffer) { + RTC_DCHECK(worker_); + worker_->ModifyBufferInt16(buffer); +} + +void FakeRecordingDevice::SimulateAnalogGain(ChannelBuffer<float>* buffer) { + RTC_DCHECK(worker_); + worker_->ModifyBufferFloat(buffer); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device.h new file mode 100644 index 0000000000..b1e37a331d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ +#define MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ + +#include <algorithm> +#include <memory> +#include <vector> + +#include "api/array_view.h" +#include "common_audio/channel_buffer.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +class FakeRecordingDeviceWorker; + +// Class for simulating a microphone with analog gain. +// +// The intended modes of operation are the following: +// +// FakeRecordingDevice fake_mic(255, 1); +// +// fake_mic.SetMicLevel(170); +// fake_mic.SimulateAnalogGain(buffer); +// +// When the mic level to undo is known: +// +// fake_mic.SetMicLevel(170); +// fake_mic.SetUndoMicLevel(30); +// fake_mic.SimulateAnalogGain(buffer); +// +// The second option virtually restores the unmodified microphone level. Calling +// SimulateAnalogGain() will first "undo" the gain applied by the real +// microphone (e.g., 30). +class FakeRecordingDevice final { + public: + FakeRecordingDevice(int initial_mic_level, int device_kind); + ~FakeRecordingDevice(); + + int MicLevel() const; + void SetMicLevel(const int level); + void SetUndoMicLevel(const int level); + + // Simulates the analog gain. + // If |real_device_level| is a valid level, the unmodified mic signal is + // virtually restored. To skip the latter step set |real_device_level| to + // an empty value. + void SimulateAnalogGain(AudioFrame* buffer); + + // Simulates the analog gain. + // If |real_device_level| is a valid level, the unmodified mic signal is + // virtually restored. To skip the latter step set |real_device_level| to + // an empty value. + void SimulateAnalogGain(ChannelBuffer<float>* buffer); + + private: + // Fake recording device worker. + std::unique_ptr<FakeRecordingDeviceWorker> worker_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_FAKE_RECORDING_DEVICE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device_unittest.cc new file mode 100644 index 0000000000..504459a3c6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/fake_recording_device_unittest.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <cmath> +#include <memory> +#include <sstream> +#include <string> +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/test/fake_recording_device.h" +#include "rtc_base/ptr_util.h" +#include "test/gtest.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr int kInitialMicLevel = 100; + +// TODO(alessiob): Add new fake recording device kind values here as they are +// added in FakeRecordingDevice::FakeRecordingDevice. +const std::vector<int> kFakeRecDeviceKinds = {0, 1}; + +const std::vector<std::vector<float>> kTestMultiChannelSamples{ + std::vector<float>{-10.f, -1.f, -0.1f, 0.f, 0.1f, 1.f, 10.f}}; + +// Writes samples into ChannelBuffer<float>. +void WritesDataIntoChannelBuffer(const std::vector<std::vector<float>>& data, + ChannelBuffer<float>* buff) { + EXPECT_EQ(data.size(), buff->num_channels()); + EXPECT_EQ(data[0].size(), buff->num_frames()); + for (size_t c = 0; c < buff->num_channels(); ++c) { + for (size_t f = 0; f < buff->num_frames(); ++f) { + buff->channels()[c][f] = data[c][f]; + } + } +} + +std::unique_ptr<ChannelBuffer<float>> CreateChannelBufferWithData( + const std::vector<std::vector<float>>& data) { + auto buff = + rtc::MakeUnique<ChannelBuffer<float>>(data[0].size(), data.size()); + WritesDataIntoChannelBuffer(data, buff.get()); + return buff; +} + +// Checks that the samples modified using monotonic level values are also +// monotonic. +void CheckIfMonotoneSamplesModules(const ChannelBuffer<float>* prev, + const ChannelBuffer<float>* curr) { + RTC_DCHECK_EQ(prev->num_channels(), curr->num_channels()); + RTC_DCHECK_EQ(prev->num_frames(), curr->num_frames()); + bool valid = true; + for (size_t i = 0; i < prev->num_channels(); ++i) { + for (size_t j = 0; j < prev->num_frames(); ++j) { + valid = std::fabs(prev->channels()[i][j]) <= + std::fabs(curr->channels()[i][j]); + if (!valid) { + break; + } + } + if (!valid) { + break; + } + } + EXPECT_TRUE(valid); +} + +// Checks that the samples in each pair have the same sign unless the sample in +// |dst| is zero (because of zero gain). +void CheckSameSign(const ChannelBuffer<float>* src, + const ChannelBuffer<float>* dst) { + RTC_DCHECK_EQ(src->num_channels(), dst->num_channels()); + RTC_DCHECK_EQ(src->num_frames(), dst->num_frames()); + const auto fsgn = [](float x) { return ((x < 0) ? -1 : (x > 0) ? 1 : 0); }; + bool valid = true; + for (size_t i = 0; i < src->num_channels(); ++i) { + for (size_t j = 0; j < src->num_frames(); ++j) { + valid = dst->channels()[i][j] == 0.0f || + fsgn(src->channels()[i][j]) == fsgn(dst->channels()[i][j]); + if (!valid) { + break; + } + } + if (!valid) { + break; + } + } + EXPECT_TRUE(valid); +} + +std::string FakeRecordingDeviceKindToString(int fake_rec_device_kind) { + std::ostringstream ss; + ss << "fake recording device: " << fake_rec_device_kind; + return ss.str(); +} + +std::string AnalogLevelToString(int level) { + std::ostringstream ss; + ss << "analog level: " << level; + return ss.str(); +} + +} // namespace + +TEST(FakeRecordingDevice, CheckHelperFunctions) { + constexpr size_t kC = 0; // Channel index. + constexpr size_t kS = 1; // Sample index. + + // Check read. + auto buff = CreateChannelBufferWithData(kTestMultiChannelSamples); + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_EQ(kTestMultiChannelSamples[c][s], buff->channels()[c][s]); + } + } + + // Check write. + buff->channels()[kC][kS] = -5.0f; + RTC_DCHECK_NE(buff->channels()[kC][kS], kTestMultiChannelSamples[kC][kS]); + + // Check reset. + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff.get()); + EXPECT_EQ(buff->channels()[kC][kS], kTestMultiChannelSamples[kC][kS]); +} + +// Implicitly checks that changes to the mic and undo levels are visible to the +// FakeRecordingDeviceWorker implementation are injected in FakeRecordingDevice. +TEST(FakeRecordingDevice, TestWorkerAbstractClass) { + FakeRecordingDevice fake_recording_device(kInitialMicLevel, 1); + + auto buff1 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(100); + fake_recording_device.SimulateAnalogGain(buff1.get()); + + auto buff2 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(200); + fake_recording_device.SimulateAnalogGain(buff2.get()); + + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_LE(std::abs(buff1->channels()[c][s]), + std::abs(buff2->channels()[c][s])); + } + } + + auto buff3 = CreateChannelBufferWithData(kTestMultiChannelSamples); + fake_recording_device.SetMicLevel(200); + fake_recording_device.SetUndoMicLevel(100); + fake_recording_device.SimulateAnalogGain(buff3.get()); + + for (size_t c = 0; c < kTestMultiChannelSamples.size(); ++c) { + for (size_t s = 0; s < kTestMultiChannelSamples[0].size(); ++s) { + EXPECT_LE(std::abs(buff1->channels()[c][s]), + std::abs(buff3->channels()[c][s])); + EXPECT_LE(std::abs(buff2->channels()[c][s]), + std::abs(buff3->channels()[c][s])); + } + } +} + +TEST(FakeRecordingDevice, GainCurveShouldBeMonotone) { + // Create input-output buffers. + auto buff_prev = CreateChannelBufferWithData(kTestMultiChannelSamples); + auto buff_curr = CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Test different mappings. + for (auto fake_rec_device_kind : kFakeRecDeviceKinds) { + SCOPED_TRACE(FakeRecordingDeviceKindToString(fake_rec_device_kind)); + FakeRecordingDevice fake_recording_device(kInitialMicLevel, + fake_rec_device_kind); + // TODO(alessiob): The test below is designed for state-less recording + // devices. If, for instance, a device has memory, the test might need + // to be redesigned (e.g., re-initialize fake recording device). + + // Apply lowest analog level. + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff_prev.get()); + fake_recording_device.SetMicLevel(0); + fake_recording_device.SimulateAnalogGain(buff_prev.get()); + + // Increment analog level to check monotonicity. + for (int i = 1; i <= 255; ++i) { + SCOPED_TRACE(AnalogLevelToString(i)); + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff_curr.get()); + fake_recording_device.SetMicLevel(i); + fake_recording_device.SimulateAnalogGain(buff_curr.get()); + CheckIfMonotoneSamplesModules(buff_prev.get(), buff_curr.get()); + + // Update prev. + buff_prev.swap(buff_curr); + } + } +} + +TEST(FakeRecordingDevice, GainCurveShouldNotChangeSign) { + // Create view on original samples. + std::unique_ptr<const ChannelBuffer<float>> buff_orig = + CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Create output buffer. + auto buff = CreateChannelBufferWithData(kTestMultiChannelSamples); + + // Test different mappings. + for (auto fake_rec_device_kind : kFakeRecDeviceKinds) { + SCOPED_TRACE(FakeRecordingDeviceKindToString(fake_rec_device_kind)); + FakeRecordingDevice fake_recording_device(kInitialMicLevel, + fake_rec_device_kind); + + // TODO(alessiob): The test below is designed for state-less recording + // devices. If, for instance, a device has memory, the test might need + // to be redesigned (e.g., re-initialize fake recording device). + for (int i = 0; i <= 255; ++i) { + SCOPED_TRACE(AnalogLevelToString(i)); + WritesDataIntoChannelBuffer(kTestMultiChannelSamples, buff.get()); + fake_recording_device.SetMicLevel(i); + fake_recording_device.SimulateAnalogGain(buff.get()); + CheckSameSign(buff_orig.get(), buff.get()); + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/performance_timer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/performance_timer.cc new file mode 100644 index 0000000000..1a82258903 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/performance_timer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/performance_timer.h" + +#include <math.h> + +#include <numeric> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +PerformanceTimer::PerformanceTimer(int num_frames_to_process) + : clock_(webrtc::Clock::GetRealTimeClock()) { + timestamps_us_.reserve(num_frames_to_process); +} + +PerformanceTimer::~PerformanceTimer() = default; + +void PerformanceTimer::StartTimer() { + start_timestamp_us_ = clock_->TimeInMicroseconds(); +} + +void PerformanceTimer::StopTimer() { + RTC_DCHECK(start_timestamp_us_); + timestamps_us_.push_back(clock_->TimeInMicroseconds() - *start_timestamp_us_); +} + +double PerformanceTimer::GetDurationAverage() const { + return GetDurationAverage(0); +} + +double PerformanceTimer::GetDurationStandardDeviation() const { + return GetDurationStandardDeviation(0); +} + +double PerformanceTimer::GetDurationAverage( + size_t number_of_warmup_samples) const { + RTC_DCHECK_GT(timestamps_us_.size(), number_of_warmup_samples); + const size_t number_of_samples = + timestamps_us_.size() - number_of_warmup_samples; + return static_cast<double>( + std::accumulate(timestamps_us_.begin() + number_of_warmup_samples, + timestamps_us_.end(), static_cast<int64_t>(0))) / + number_of_samples; +} + +double PerformanceTimer::GetDurationStandardDeviation( + size_t number_of_warmup_samples) const { + RTC_DCHECK_GT(timestamps_us_.size(), number_of_warmup_samples); + const size_t number_of_samples = + timestamps_us_.size() - number_of_warmup_samples; + RTC_DCHECK_GT(number_of_samples, 0); + double average_duration = GetDurationAverage(number_of_warmup_samples); + + double variance = std::accumulate( + timestamps_us_.begin() + number_of_warmup_samples, timestamps_us_.end(), + 0.0, [average_duration](const double& a, const int64_t& b) { + return a + (b - average_duration) * (b - average_duration); + }); + + return sqrt(variance / number_of_samples); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/performance_timer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/performance_timer.h new file mode 100644 index 0000000000..1c862dc78e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/performance_timer.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ +#define MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ + +#include <vector> + +#include "api/optional.h" +#include "system_wrappers/include/clock.h" + +namespace webrtc { +namespace test { + +class PerformanceTimer { + public: + explicit PerformanceTimer(int num_frames_to_process); + ~PerformanceTimer(); + + void StartTimer(); + void StopTimer(); + + double GetDurationAverage() const; + double GetDurationStandardDeviation() const; + + // These methods are the same as those above, but they ignore the first + // |number_of_warmup_samples| measurements. + double GetDurationAverage(size_t number_of_warmup_samples) const; + double GetDurationStandardDeviation(size_t number_of_warmup_samples) const; + + private: + webrtc::Clock* clock_; + rtc::Optional<int64_t> start_timestamp_us_; + std::vector<int64_t> timestamps_us_; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_PERFORMANCE_TIMER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/protobuf_utils.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/protobuf_utils.cc new file mode 100644 index 0000000000..3b623b915c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/protobuf_utils.cc @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/protobuf_utils.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +size_t ReadMessageBytesFromFile(FILE* file, std::unique_ptr<uint8_t[]>* bytes) { + // The "wire format" for the size is little-endian. Assume we're running on + // a little-endian machine. +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Need to convert messsage from little-endian." +#endif + int32_t size = 0; + if (fread(&size, sizeof(size), 1, file) != 1) + return 0; + if (size <= 0) + return 0; + + bytes->reset(new uint8_t[size]); + return fread(bytes->get(), sizeof((*bytes)[0]), size, file); +} + +// Returns true on success, false on error or end-of-file. +bool ReadMessageFromFile(FILE* file, MessageLite* msg) { + std::unique_ptr<uint8_t[]> bytes; + size_t size = ReadMessageBytesFromFile(file, &bytes); + if (!size) + return false; + + msg->Clear(); + return msg->ParseFromArray(bytes.get(), size); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/protobuf_utils.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/protobuf_utils.h new file mode 100644 index 0000000000..dded9b4ca7 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/protobuf_utils.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ + +#include <memory> + +#include "rtc_base/ignore_wundef.h" +#include "rtc_base/protobuf_utils.h" + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +RTC_POP_IGNORING_WUNDEF() + +namespace webrtc { + +// Allocates new memory in the unique_ptr to fit the raw message and returns the +// number of bytes read. +size_t ReadMessageBytesFromFile(FILE* file, std::unique_ptr<uint8_t[]>* bytes); + +// Returns true on success, false on error or end-of-file. +bool ReadMessageFromFile(FILE* file, MessageLite* msg); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_PROTOBUF_UTILS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn new file mode 100644 index 0000000000..64e3a30bd3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn @@ -0,0 +1,174 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +group("py_quality_assessment") { + testonly = true + deps = [ + ":scripts", + ":unit_tests", + ] +} + +copy("scripts") { + testonly = true + sources = [ + "README.md", + "apm_quality_assessment.py", + "apm_quality_assessment.sh", + "apm_quality_assessment_boxplot.py", + "apm_quality_assessment_export.py", + "apm_quality_assessment_gencfgs.py", + "apm_quality_assessment_optimize.py", + ] + outputs = [ + "$root_build_dir/py_quality_assessment/{{source_file_part}}", + ] + deps = [ + ":apm_configs", + ":lib", + ":output", + "../..:audioproc_f", + "../../../../resources/audio_processing/test/py_quality_assessment:probing_signals", + ] +} + +copy("apm_configs") { + testonly = true + sources = [ + "apm_configs/default.json", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ + "$root_build_dir/py_quality_assessment/apm_configs/{{source_file_part}}", + ] +} # apm_configs + +copy("lib") { + testonly = true + sources = [ + "quality_assessment/__init__.py", + "quality_assessment/annotations.py", + "quality_assessment/audioproc_wrapper.py", + "quality_assessment/collect_data.py", + "quality_assessment/data_access.py", + "quality_assessment/echo_path_simulation.py", + "quality_assessment/echo_path_simulation_factory.py", + "quality_assessment/eval_scores.py", + "quality_assessment/eval_scores_factory.py", + "quality_assessment/evaluation.py", + "quality_assessment/exceptions.py", + "quality_assessment/export.py", + "quality_assessment/export_unittest.py", + "quality_assessment/external_vad.py", + "quality_assessment/input_mixer.py", + "quality_assessment/input_signal_creator.py", + "quality_assessment/results.css", + "quality_assessment/results.js", + "quality_assessment/signal_processing.py", + "quality_assessment/simulation.py", + "quality_assessment/test_data_generation.py", + "quality_assessment/test_data_generation_factory.py", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ + "$root_build_dir/py_quality_assessment/quality_assessment/{{source_file_part}}", + ] + deps = [ + "../../../../resources/audio_processing/test/py_quality_assessment:noise_tracks", + ] +} + +copy("output") { + testonly = true + sources = [ + "output/README.md", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ + "$root_build_dir/py_quality_assessment/output/{{source_file_part}}", + ] +} + +group("unit_tests") { + testonly = true + visibility = [ ":*" ] # Only targets in this file can depend on this. + deps = [ + ":apm_vad", + ":fake_polqa", + ":lib_unit_tests", + ":scripts_unit_tests", + ":vad", + ] +} + +rtc_executable("fake_polqa") { + testonly = true + sources = [ + "quality_assessment/fake_polqa.cc", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + output_name = "py_quality_assessment/quality_assessment/fake_polqa" + deps = [ + "../../../..:webrtc_common", + "../../../../rtc_base:rtc_base_approved", + ] +} + +rtc_executable("vad") { + sources = [ + "quality_assessment/vad.cc", + ] + deps = [ + "../../../..:webrtc_common", + "../../../../common_audio", + "../../../../rtc_base:rtc_base_approved", + ] +} + +rtc_executable("apm_vad") { + sources = [ + "quality_assessment/apm_vad.cc", + ] + deps = [ + "../..", + "../../../..:webrtc_common", + "../../../../common_audio", + "../../../../rtc_base:rtc_base_approved", + ] +} + +copy("lib_unit_tests") { + testonly = true + sources = [ + "quality_assessment/annotations_unittest.py", + "quality_assessment/echo_path_simulation_unittest.py", + "quality_assessment/eval_scores_unittest.py", + "quality_assessment/fake_external_vad.py", + "quality_assessment/input_mixer_unittest.py", + "quality_assessment/signal_processing_unittest.py", + "quality_assessment/simulation_unittest.py", + "quality_assessment/test_data_generation_unittest.py", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ + "$root_build_dir/py_quality_assessment/quality_assessment/{{source_file_part}}", + ] +} + +copy("scripts_unit_tests") { + testonly = true + sources = [ + "apm_quality_assessment_unittest.py", + ] + visibility = [ ":*" ] # Only targets in this file can depend on this. + outputs = [ + "$root_build_dir/py_quality_assessment/{{source_file_part}}", + ] +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/OWNERS b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/OWNERS new file mode 100644 index 0000000000..4dc254dcee --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/OWNERS @@ -0,0 +1,8 @@ +aleloi@webrtc.org +alessiob@webrtc.org +henrik.lundin@webrtc.org +ivoc@webrtc.org +peah@webrtc.org + +per-file *.gn=* +per-file *.gni=* diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/README.md b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/README.md new file mode 100644 index 0000000000..6fa0b7004c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/README.md @@ -0,0 +1,125 @@ +# APM Quality Assessment tool + +Python wrapper of APM simulators (e.g., `audioproc_f`) with which quality +assessment can be automatized. The tool allows to simulate different noise +conditions, input signals, APM configurations and it computes different scores. +Once the scores are computed, the results can be easily exported to an HTML page +which allows to listen to the APM input and output signals and also the +reference one used for evaluation. + +## Dependencies + - OS: Linux + - Python 2.7 + - Python libraries: enum34, numpy, scipy, pydub (0.17.0+), pandas (0.20.1+), + pyquery (1.2+), jsmin (2.2+), csscompressor (0.9.4) + - It is recommended that a dedicated Python environment is used + - install `virtualenv` + - `$ sudo apt-get install python-virtualenv` + - setup a new Python environment (e.g., `my_env`) + - `$ cd ~ && virtualenv my_env` + - activate the new Python environment + - `$ source ~/my_env/bin/activate` + - add dependcies via `pip` + - `(my_env)$ pip install enum34 numpy pydub scipy pandas pyquery jsmin \` + `csscompressor` + - PolqaOem64 (see http://www.polqa.info/) + - Tested with POLQA Library v1.180 / P863 v2.400 + - Aachen Impulse Response (AIR) Database + - Download https://www2.iks.rwth-aachen.de/air/air_database_release_1_4.zip + - Input probing signals and noise tracks (you can make your own dataset - *1) + +## Build + - Compile WebRTC + - Go to `out/Default/py_quality_assessment` and check that + `apm_quality_assessment.py` exists + +## Unit tests + - Compile WebRTC + - Go to `out/Default/py_quality_assessment` + - Run `python -m unittest discover -p "*_unittest.py"` + +## First time setup + - Deploy PolqaOem64 and set the `POLQA_PATH` environment variable + - e.g., `$ export POLQA_PATH=/var/opt/PolqaOem64` + - Deploy the AIR Database and set the `AECHEN_IR_DATABASE_PATH` environment + variable + - e.g., `$ export AECHEN_IR_DATABASE_PATH=/var/opt/AIR_1_4` + - Deploy probing signal tracks into + - `out/Default/py_quality_assessment/probing_signals` (*1) + - Deploy noise tracks into + - `out/Default/py_quality_assessment/noise_tracks` (*1, *2) + +(*1) You can use custom files as long as they are mono tracks sampled at 48kHz +encoded in the 16 bit signed format (it is recommended that the tracks are +converted and exported with Audacity). + +## Usage (scores computation) + - Go to `out/Default/py_quality_assessment` + - Check the `apm_quality_assessment.sh` as an example script to parallelize the + experiments + - Adjust the script according to your preferences (e.g., output path) + - Run `apm_quality_assessment.sh` + - The script will end by opening the browser and showing ALL the computed + scores + +## Usage (export reports) +Showing all the results at once can be confusing. You therefore may want to +export separate reports. In this case, you can use the +`apm_quality_assessment_export.py` script as follows: + + - Set `--output_dir, -o` to the same value used in `apm_quality_assessment.sh` + - Use regular expressions to select/filter out scores by + - APM configurations: `--config_names, -c` + - capture signals: `--capture_names, -i` + - render signals: `--render_names, -r` + - echo simulator: `--echo_simulator_names, -e` + - test data generators: `--test_data_generators, -t` + - scores: `--eval_scores, -s` + - Assign a suffix to the report name using `-f <suffix>` + +For instance: + +``` +$ ./apm_quality_assessment_export.py \ + -o output/ \ + -c "(^default$)|(.*AE.*)" \ + -t \(white_noise\) \ + -s \(polqa\) \ + -f echo +``` + +## Usage (boxplot) +After generating stats, it can help to visualize how a score depends on a +certain APM simulator parameter. The `apm_quality_assessment_boxplot.py` script +helps with that, producing plots similar to [this +one](https://matplotlib.org/mpl_examples/pylab_examples/boxplot_demo_06.png). + +Suppose some scores come from running the APM simulator `audioproc_f` with +or without the intelligibility enhancer: `--ie=1` or `--ie=0`. Then two boxplots +side by side can be generated with + +``` +$ ./apm_quality_assessment_boxplot.py \ + -o /path/to/output + -v <score_name> + -n /path/to/dir/with/apm_configs + -z ie +``` + +## Troubleshooting +The input wav file must be: + - sampled at a sample rate that is a multiple of 100 (required by POLQA) + - in the 16 bit format (required by `audioproc_f`) + - encoded in the Microsoft WAV signed 16 bit PCM format (Audacity default + when exporting) + +Depending on the license, the POLQA tool may take “breaks” as a way to limit the +throughput. When this happens, the APM Quality Assessment tool is slowed down. +For more details about this limitation, check Section 10.9.1 in the POLQA manual +v.1.18. + +In case of issues with the POLQA score computation, check +`py_quality_assessment/eval_scores.py` and adapt +`PolqaScore._parse_output_file()`. +The code can be also fixed directly into the build directory (namely, +`out/Default/py_quality_assessment/eval_scores.py`). diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json new file mode 100644 index 0000000000..5c3277bac0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_configs/default.json @@ -0,0 +1 @@ +{"-all_default": null} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py new file mode 100755 index 0000000000..a4cc5f037f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Perform APM module quality assessment on one or more input files using one or + more APM simulator configuration files and one or more test data generators. + +Usage: apm_quality_assessment.py -i audio1.wav [audio2.wav ...] + -c cfg1.json [cfg2.json ...] + -n white [echo ...] + -e audio_level [polqa ...] + -o /path/to/output +""" + +import argparse +import logging +import os +import sys + +import quality_assessment.audioproc_wrapper as audioproc_wrapper +import quality_assessment.echo_path_simulation as echo_path_simulation +import quality_assessment.eval_scores as eval_scores +import quality_assessment.evaluation as evaluation +import quality_assessment.eval_scores_factory as eval_scores_factory +import quality_assessment.external_vad as external_vad +import quality_assessment.test_data_generation as test_data_generation +import quality_assessment.test_data_generation_factory as \ + test_data_generation_factory +import quality_assessment.simulation as simulation + +_ECHO_PATH_SIMULATOR_NAMES = ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES) +_TEST_DATA_GENERATOR_CLASSES = ( + test_data_generation.TestDataGenerator.REGISTERED_CLASSES) +_TEST_DATA_GENERATORS_NAMES = _TEST_DATA_GENERATOR_CLASSES.keys() +_EVAL_SCORE_WORKER_CLASSES = eval_scores.EvaluationScore.REGISTERED_CLASSES +_EVAL_SCORE_WORKER_NAMES = _EVAL_SCORE_WORKER_CLASSES.keys() + +_DEFAULT_CONFIG_FILE = 'apm_configs/default.json' + +_POLQA_BIN_NAME = 'PolqaOem64' + + +def _InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = argparse.ArgumentParser(description=( + 'Perform APM module quality assessment on one or more input files using ' + 'one or more APM simulator configuration files and one or more ' + 'test data generators.')) + + parser.add_argument('-c', '--config_files', nargs='+', required=False, + help=('path to the configuration files defining the ' + 'arguments with which the APM simulator tool is ' + 'called'), + default=[_DEFAULT_CONFIG_FILE]) + + parser.add_argument('-i', '--capture_input_files', nargs='+', required=True, + help='path to the capture input wav files (one or more)') + + parser.add_argument('-r', '--render_input_files', nargs='+', required=False, + help=('path to the render input wav files; either ' + 'omitted or one file for each file in ' + '--capture_input_files (files will be paired by ' + 'index)'), default=None) + + parser.add_argument('-p', '--echo_path_simulator', required=False, + help=('custom echo path simulator name; required if ' + '--render_input_files is specified'), + choices=_ECHO_PATH_SIMULATOR_NAMES, + default=echo_path_simulation.NoEchoPathSimulator.NAME) + + parser.add_argument('-t', '--test_data_generators', nargs='+', required=False, + help='custom list of test data generators to use', + choices=_TEST_DATA_GENERATORS_NAMES, + default=_TEST_DATA_GENERATORS_NAMES) + + parser.add_argument('--additive_noise_tracks_path', required=False, + help='path to the wav files for the additive', + default=test_data_generation. \ + AdditiveNoiseTestDataGenerator. \ + DEFAULT_NOISE_TRACKS_PATH) + + parser.add_argument('-e', '--eval_scores', nargs='+', required=False, + help='custom list of evaluation scores to use', + choices=_EVAL_SCORE_WORKER_NAMES, + default=_EVAL_SCORE_WORKER_NAMES) + + parser.add_argument('-o', '--output_dir', required=False, + help=('base path to the output directory in which the ' + 'output wav files and the evaluation outcomes ' + 'are saved'), + default='output') + + parser.add_argument('--polqa_path', required=True, + help='path to the POLQA tool') + + parser.add_argument('--air_db_path', required=True, + help='path to the Aechen IR database') + + parser.add_argument('--apm_sim_path', required=False, + help='path to the APM simulator tool', + default=audioproc_wrapper. \ + AudioProcWrapper. \ + DEFAULT_APM_SIMULATOR_BIN_PATH) + + parser.add_argument('--copy_with_identity_generator', required=False, + help=('If true, the identity test data generator makes a ' + 'copy of the clean speech input file.'), + default=False) + + parser.add_argument('--external_vad_paths', nargs='+', required=False, + help=('Paths to external VAD programs. Each must take' + '\'-i <wav file> -o <output>\' inputs'), default=[]) + + parser.add_argument('--external_vad_names', nargs='+', required=False, + help=('Keys to the vad paths. Must be different and ' + 'as many as the paths.'), default=[]) + + return parser + + +def _ValidateArguments(args, parser): + if args.capture_input_files and args.render_input_files and ( + len(args.capture_input_files) != len(args.render_input_files)): + parser.error('--render_input_files and --capture_input_files must be lists ' + 'having the same length') + sys.exit(1) + + if args.render_input_files and not args.echo_path_simulator: + parser.error('when --render_input_files is set, --echo_path_simulator is ' + 'also required') + sys.exit(1) + + if len(args.external_vad_names) != len(args.external_vad_paths): + parser.error('If provided, --external_vad_paths and ' + '--external_vad_names must ' + 'have the same number of arguments.') + sys.exit(1) + + +def main(): + # TODO(alessiob): level = logging.INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = _InstanceArgumentsParser() + args = parser.parse_args() + _ValidateArguments(args, parser) + + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path=args.air_db_path, + noise_tracks_path=args.additive_noise_tracks_path, + copy_with_identity=args.copy_with_identity_generator)), + evaluation_score_factory=eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join(args.polqa_path, _POLQA_BIN_NAME)), + ap_wrapper=audioproc_wrapper.AudioProcWrapper(args.apm_sim_path), + evaluator=evaluation.ApmModuleEvaluator(), + external_vads=external_vad.ExternalVad.ConstructVadDict( + args.external_vad_paths, args.external_vad_names)) + simulator.Run( + config_filepaths=args.config_files, + capture_input_filepaths=args.capture_input_files, + render_input_filepaths=args.render_input_files, + echo_path_simulator_name=args.echo_path_simulator, + test_data_generator_names=args.test_data_generators, + eval_score_names=args.eval_scores, + output_dir=args.output_dir) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh new file mode 100755 index 0000000000..aa563ee26b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +# Path to the POLQA tool. +if [ -z ${POLQA_PATH} ]; then # Check if defined. + # Default location. + export POLQA_PATH='/var/opt/PolqaOem64' +fi +if [ -d "${POLQA_PATH}" ]; then + echo "POLQA found in ${POLQA_PATH}" +else + echo "POLQA not found in ${POLQA_PATH}" + exit 1 +fi + +# Path to the Aechen IR database. +if [ -z ${AECHEN_IR_DATABASE_PATH} ]; then # Check if defined. + # Default location. + export AECHEN_IR_DATABASE_PATH='/var/opt/AIR_1_4' +fi +if [ -d "${AECHEN_IR_DATABASE_PATH}" ]; then + echo "AIR database found in ${AECHEN_IR_DATABASE_PATH}" +else + echo "AIR database not found in ${AECHEN_IR_DATABASE_PATH}" + exit 1 +fi + +# Customize probing signals, test data generators and scores if needed. +CAPTURE_SIGNALS=(probing_signals/*.wav) +TEST_DATA_GENERATORS=( \ + "identity" \ + "white_noise" \ + # "environmental_noise" \ + # "reverberation" \ +) +SCORES=( \ + # "polqa" \ + "audio_level_peak" \ + "audio_level_mean" \ +) +OUTPUT_PATH=output + +# Generate standard APM config files. +chmod +x apm_quality_assessment_gencfgs.py +./apm_quality_assessment_gencfgs.py + +# Customize APM configurations if needed. +APM_CONFIGS=(apm_configs/*.json) + +# Add output path if missing. +if [ ! -d ${OUTPUT_PATH} ]; then + mkdir ${OUTPUT_PATH} +fi + +# Start one process for each "probing signal"-"test data source" pair. +chmod +x apm_quality_assessment.py +for capture_signal_filepath in "${CAPTURE_SIGNALS[@]}" ; do + probing_signal_name="$(basename $capture_signal_filepath)" + probing_signal_name="${probing_signal_name%.*}" + for test_data_gen_name in "${TEST_DATA_GENERATORS[@]}" ; do + LOG_FILE="${OUTPUT_PATH}/apm_qa-${probing_signal_name}-"` + `"${test_data_gen_name}.log" + echo "Starting ${probing_signal_name} ${test_data_gen_name} "` + `"(see ${LOG_FILE})" + ./apm_quality_assessment.py \ + --polqa_path ${POLQA_PATH}\ + --air_db_path ${AECHEN_IR_DATABASE_PATH}\ + -i ${capture_signal_filepath} \ + -o ${OUTPUT_PATH} \ + -t ${test_data_gen_name} \ + -c "${APM_CONFIGS[@]}" \ + -e "${SCORES[@]}" > $LOG_FILE 2>&1 & + done +done + +# Join Python processes running apm_quality_assessment.py. +wait + +# Export results. +chmod +x ./apm_quality_assessment_export.py +./apm_quality_assessment_export.py -o ${OUTPUT_PATH} + +# Show results in the browser. +RESULTS_FILE="$(realpath ${OUTPUT_PATH}/results.html)" +sensible-browser "file://${RESULTS_FILE}" > /dev/null 2>&1 & diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py new file mode 100644 index 0000000000..ec87970d75 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_boxplot.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Shows boxplots of given score for different values of selected +parameters. Can be used to compare scores by audioproc_f flag. + +Usage: apm_quality_assessment_boxplot.py -o /path/to/output + -v polqa + -n /path/to/dir/with/apm_configs + -z audioproc_f_arg1 [arg2 ...] + +Arguments --config_names, --render_names, --echo_simulator_names, +--test_data_generators, --eval_scores can be used to filter the data +used for plotting. +""" + +import collections +import logging +import matplotlib.pyplot as plt +import os + +import quality_assessment.data_access as data_access +import quality_assessment.collect_data as collect_data + + +def InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = collect_data.InstanceArgumentsParser() + parser.description = ( + 'Shows boxplot of given score for different values of selected' + 'parameters. Can be used to compare scores by audioproc_f flag') + + parser.add_argument('-v', '--eval_score', required=True, + help=('Score name for constructing boxplots')) + + parser.add_argument('-n', '--config_dir', required=False, + help=('path to the folder with the configuration files'), + default='apm_configs') + + parser.add_argument('-z', '--params_to_plot', required=True, + nargs='+', help=('audioproc_f parameter values' + 'by which to group scores (no leading dash)')) + + return parser + + +def FilterScoresByParams(data_frame, filter_params, score_name, config_dir): + """Filters data on the values of one or more parameters. + + Args: + data_frame: pandas.DataFrame of all used input data. + + filter_params: each config of the input data is assumed to have + exactly one parameter from `filter_params` defined. Every value + of the parameters in `filter_params` is a key in the returned + dict; the associated value is all cells of the data with that + value of the parameter. + + score_name: Name of score which value is boxplotted. Currently cannot do + more than one value. + + config_dir: path to dir with APM configs. + + Returns: dictionary, key is a param value, result is all scores for + that param value (see `filter_params` for explanation). + """ + results = collections.defaultdict(dict) + config_names = data_frame['apm_config'].drop_duplicates().values.tolist() + + for config_name in config_names: + config_json = data_access.AudioProcConfigFile.Load( + os.path.join(config_dir, config_name + '.json')) + data_with_config = data_frame[data_frame.apm_config == config_name] + data_cell_scores = data_with_config[data_with_config.eval_score_name == + score_name] + + # Exactly one of |params_to_plot| must match: + (matching_param, ) = [x for x in filter_params if '-' + x in config_json] + + # Add scores for every track to the result. + for capture_name in data_cell_scores.capture: + result_score = float(data_cell_scores[data_cell_scores.capture == + capture_name].score) + config_dict = results[config_json['-' + matching_param]] + if capture_name not in config_dict: + config_dict[capture_name] = {} + + config_dict[capture_name][matching_param] = result_score + + return results + + +def _FlattenToScoresList(config_param_score_dict): + """Extracts a list of scores from input data structure. + + Args: + config_param_score_dict: of the form {'capture_name': + {'param_name' : score_value,.. } ..} + + Returns: Plain list of all score value present in input data + structure + """ + result = [] + for capture_name in config_param_score_dict: + result += list(config_param_score_dict[capture_name].values()) + return result + + +def main(): + # Init. + # TODO(alessiob): INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = InstanceArgumentsParser() + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug(src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + + # Filter the data by `args.params_to_plot` + scores_filtered = FilterScoresByParams(scores_data_frame, + args.params_to_plot, + args.eval_score, + args.config_dir) + + data_list = sorted(scores_filtered.items()) + data_values = [_FlattenToScoresList(x) for (_, x) in data_list] + data_labels = [x for (x, _) in data_list] + + _, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 6)) + axes.boxplot(data_values, labels=data_labels) + axes.set_ylabel(args.eval_score) + axes.set_xlabel('/'.join(args.params_to_plot)) + plt.show() + + +if __name__ == "__main__": + main() diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py new file mode 100755 index 0000000000..5c8a52bf16 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Export the scores computed by the apm_quality_assessment.py script into an + HTML file. +""" + +import logging +import os +import sys + +import quality_assessment.collect_data as collect_data +import quality_assessment.export as export + + +def _BuildOutputFilename(filename_suffix): + """Builds the filename for the exported file. + + Args: + filename_suffix: suffix for the output file name. + + Returns: + A string. + """ + if filename_suffix is None: + return 'results.html' + return 'results-{}.html'.format(filename_suffix) + +def main(): + # Init. + logging.basicConfig(level=logging.DEBUG) # TODO(alessio): INFO once debugged. + parser = collect_data.InstanceArgumentsParser() + parser.add_argument('-f', '--filename_suffix', + help=('suffix of the exported file')) + parser.description = ('Exports pre-computed APM module quality assessment ' + 'results into HTML tables') + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug(src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + + # Export. + output_filepath = os.path.join(args.output_dir, _BuildOutputFilename( + args.filename_suffix)) + exporter = export.HtmlExport(output_filepath) + exporter.Export(scores_data_frame) + + logging.info('output file successfully written in %s', output_filepath) + sys.exit(0) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py new file mode 100755 index 0000000000..4017747cc2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Generate .json files with which the APM module can be tested using the + apm_quality_assessment.py script and audioproc_f as APM simulator. +""" + +import logging +import os + +import quality_assessment.data_access as data_access + +OUTPUT_PATH = os.path.abspath('apm_configs') + + +def _GenerateDefaultOverridden(config_override): + """Generates one or more APM overriden configurations. + + For each item in config_override, it overrides the default configuration and + writes a new APM configuration file. + + The default settings are loaded via "-all_default". + Check "src/modules/audio_processing/test/audioproc_float.cc" and search + for "if (FLAG_all_default) {". + + For instance, in 55eb6d621489730084927868fed195d3645a9ec9 the default is this: + settings.use_aec = rtc::Optional<bool>(true); + settings.use_aecm = rtc::Optional<bool>(false); + settings.use_agc = rtc::Optional<bool>(true); + settings.use_bf = rtc::Optional<bool>(false); + settings.use_ed = rtc::Optional<bool>(false); + settings.use_hpf = rtc::Optional<bool>(true); + settings.use_ie = rtc::Optional<bool>(false); + settings.use_le = rtc::Optional<bool>(true); + settings.use_ns = rtc::Optional<bool>(true); + settings.use_ts = rtc::Optional<bool>(true); + settings.use_vad = rtc::Optional<bool>(true); + + Args: + config_override: dict of APM configuration file names as keys; the values + are dict instances encoding the audioproc_f flags. + """ + for config_filename in config_override: + config = config_override[config_filename] + config['-all_default'] = None + + config_filepath = os.path.join(OUTPUT_PATH, 'default-{}.json'.format( + config_filename)) + logging.debug('config file <%s> | %s', config_filepath, config) + + data_access.AudioProcConfigFile.Save(config_filepath, config) + logging.info('config file created: <%s>', config_filepath) + + +def _GenerateAllDefaultButOne(): + """Disables the flags enabled by default one-by-one. + """ + config_sets = { + 'no_AEC': {'-aec': 0,}, + 'no_AGC': {'-agc': 0,}, + 'no_HP_filter': {'-hpf': 0,}, + 'no_level_estimator': {'-le': 0,}, + 'no_noise_suppressor': {'-ns': 0,}, + 'no_transient_suppressor': {'-ts': 0,}, + 'no_vad': {'-vad': 0,}, + } + _GenerateDefaultOverridden(config_sets) + + +def _GenerateAllDefaultPlusOne(): + """Enables the flags disabled by default one-by-one. + """ + config_sets = { + 'with_AECM': {'-aec': 0, '-aecm': 1,}, # AEC and AECM are exclusive. + 'with_AGC_limiter': {'-agc_limiter': 1,}, + 'with_AEC_delay_agnostic': {'-delay_agnostic': 1,}, + 'with_drift_compensation': {'-drift_compensation': 1,}, + 'with_residual_echo_detector': {'-ed': 1,}, + 'with_AEC_extended_filter': {'-extended_filter': 1,}, + 'with_intelligibility_enhancer': {'-ie': 1,}, + 'with_LC': {'-lc': 1,}, + 'with_refined_adaptive_filter': {'-refined_adaptive_filter': 1,}, + } + _GenerateDefaultOverridden(config_sets) + + +def main(): + logging.basicConfig(level=logging.INFO) + _GenerateAllDefaultPlusOne() + _GenerateAllDefaultButOne() + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py new file mode 100644 index 0000000000..7946fe2aeb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Finds the APM configuration that maximizes a provided metric by +parsing the output generated apm_quality_assessment.py. +""" + +from __future__ import division + +import collections +import logging +import os + +import quality_assessment.data_access as data_access +import quality_assessment.collect_data as collect_data + +def _InstanceArgumentsParser(): + """Arguments parser factory. Extends the arguments from 'collect_data' + with a few extra for selecting what parameters to optimize for. + """ + parser = collect_data.InstanceArgumentsParser() + parser.description = ( + 'Rudimentary optimization of a function over different parameter' + 'combinations.') + + parser.add_argument('-n', '--config_dir', required=False, + help=('path to the folder with the configuration files'), + default='apm_configs') + + parser.add_argument('-p', '--params', required=True, nargs='+', + help=('parameters to parse from the config files in' + 'config_dir')) + + parser.add_argument('-z', '--params_not_to_optimize', required=False, + nargs='+', default=[], + help=('parameters from `params` not to be optimized for')) + + return parser + + +def _ConfigurationAndScores(data_frame, params, + params_not_to_optimize, config_dir): + """Returns a list of all configurations and scores. + + Args: + data_frame: A pandas data frame with the scores and config name + returned by _FindScores. + params: The parameter names to parse from configs the config + directory + + params_not_to_optimize: The parameter names which shouldn't affect + the optimal parameter + selection. E.g., fixed settings and not + tunable parameters. + + config_dir: Path to folder with config files. + + Returns: + Dictionary of the form + {param_combination: [{params: {param1: value1, ...}, + scores: {score1: value1, ...}}]}. + + The key `param_combination` runs over all parameter combinations + of the parameters in `params` and not in + `params_not_to_optimize`. A corresponding value is a list of all + param combinations for params in `params_not_to_optimize` and + their scores. + """ + results = collections.defaultdict(list) + config_names = data_frame['apm_config'].drop_duplicates().values.tolist() + score_names = data_frame['eval_score_name'].drop_duplicates().values.tolist() + + # Normalize the scores + normalization_constants = {} + for score_name in score_names: + scores = data_frame[data_frame.eval_score_name == score_name].score + normalization_constants[score_name] = max(scores) + + params_to_optimize = [p for p in params if p not in params_not_to_optimize] + param_combination = collections.namedtuple("ParamCombination", + params_to_optimize) + + for config_name in config_names: + config_json = data_access.AudioProcConfigFile.Load( + os.path.join(config_dir, config_name + ".json")) + scores = {} + data_cell = data_frame[data_frame.apm_config == config_name] + for score_name in score_names: + data_cell_scores = data_cell[data_cell.eval_score_name == + score_name].score + scores[score_name] = sum(data_cell_scores) / len(data_cell_scores) + scores[score_name] /= normalization_constants[score_name] + + result = {'scores': scores, 'params': {}} + config_optimize_params = {} + for param in params: + if param in params_to_optimize: + config_optimize_params[param] = config_json['-' + param] + else: + result['params'][param] = config_json['-' + param] + + current_param_combination = param_combination( # pylint: disable=star-args + **config_optimize_params) + results[current_param_combination].append(result) + return results + + +def _FindOptimalParameter(configs_and_scores, score_weighting): + """Finds the config producing the maximal score. + + Args: + configs_and_scores: structure of the form returned by + _ConfigurationAndScores + + score_weighting: a function to weight together all score values of + the form [{params: {param1: value1, ...}, scores: + {score1: value1, ...}}] into a numeric + value + Returns: + the config that has the largest values of |score_weighting| applied + to its scores. + """ + + min_score = float('+inf') + best_params = None + for config in configs_and_scores: + scores_and_params = configs_and_scores[config] + current_score = score_weighting(scores_and_params) + if current_score < min_score: + min_score = current_score + best_params = config + logging.debug("Score: %f", current_score) + logging.debug("Config: %s", str(config)) + return best_params + + +def _ExampleWeighting(scores_and_configs): + """Example argument to `_FindOptimalParameter` + Args: + scores_and_configs: a list of configs and scores, in the form + described in _FindOptimalParameter + Returns: + numeric value, the sum of all scores + """ + res = 0 + for score_config in scores_and_configs: + res += sum(score_config['scores'].values()) + return res + + +def main(): + # Init. + # TODO(alessiob): INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = _InstanceArgumentsParser() + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug('Src path <%s>', src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + all_scores = _ConfigurationAndScores(scores_data_frame, + args.params, + args.params_not_to_optimize, + args.config_dir) + + opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting) + + logging.info('Optimal parameter combination: <%s>', opt_param) + logging.info('It\'s score values: <%s>', all_scores[opt_param]) + +if __name__ == "__main__": + main() diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py new file mode 100644 index 0000000000..f5240f8696 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_unittest.py @@ -0,0 +1,33 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the apm_quality_assessment module. +""" + +import os +import sys +import unittest + +SRC = os.path.abspath(os.path.join( + os.path.dirname((__file__)), os.pardir, os.pardir, os.pardir)) +sys.path.append(os.path.join(SRC, 'third_party', 'pymock')) + +import mock + +import apm_quality_assessment + +class TestSimulationScript(unittest.TestCase): + """Unit tests for the apm_quality_assessment module. + """ + + def testMain(self): + # Exit with error code if no arguments are passed. + with self.assertRaises(SystemExit) as cm, mock.patch.object( + sys, 'argv', ['apm_quality_assessment.py']): + apm_quality_assessment.main() + self.assertGreater(cm.exception.code, 0) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/output/README.md b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/output/README.md new file mode 100644 index 0000000000..66e2a1c848 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/output/README.md @@ -0,0 +1 @@ +You can use this folder for the output generated by the apm_quality_assessment scripts. diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py new file mode 100644 index 0000000000..b870dfaef3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py new file mode 100644 index 0000000000..5c8c8bacbc --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations.py @@ -0,0 +1,293 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Extraction of annotations from audio files. +""" + +from __future__ import division +import logging +import os +import shutil +import struct +import subprocess +import sys +import tempfile + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import external_vad +from . import exceptions +from . import signal_processing + + +class AudioAnnotationsExtractor(object): + """Extracts annotations from audio files. + """ + + # TODO(aleloi): change to enum.IntEnum when py 3.6 is available. + class VadType(object): + ENERGY_THRESHOLD = 1 # TODO(alessiob): Consider switching to P56 standard. + WEBRTC_COMMON_AUDIO = 2 # common_audio/vad/include/vad.h + WEBRTC_APM = 4 # modules/audio_processing/vad/vad.h + + def __init__(self, value): + if (not isinstance(value, int)) or not 0 <= value <= 7: + raise exceptions.InitializationException( + 'Invalid vad type: ' + value) + self._value = value + + def Contains(self, vad_type): + return self._value | vad_type == self._value + + def __str__(self): + vads = [] + if self.Contains(self.ENERGY_THRESHOLD): + vads.append("energy") + if self.Contains(self.WEBRTC_COMMON_AUDIO): + vads.append("common_audio") + if self.Contains(self.WEBRTC_APM): + vads.append("apm") + return "VadType({})".format(", ".join(vads)) + + _OUTPUT_FILENAME_TEMPLATE = '{}annotations.npz' + + # Level estimation params. + _ONE_DB_REDUCTION = np.power(10.0, -1.0 / 20.0) + _LEVEL_FRAME_SIZE_MS = 1.0 + # The time constants in ms indicate the time it takes for the level estimate + # to go down/up by 1 db if the signal is zero. + _LEVEL_ATTACK_MS = 5.0 + _LEVEL_DECAY_MS = 20.0 + + # VAD params. + _VAD_THRESHOLD = 1 + _VAD_WEBRTC_PATH = os.path.join(os.path.dirname( + os.path.abspath(__file__)), os.pardir, os.pardir) + _VAD_WEBRTC_COMMON_AUDIO_PATH = os.path.join(_VAD_WEBRTC_PATH, 'vad') + + _VAD_WEBRTC_APM_PATH = os.path.join( + _VAD_WEBRTC_PATH, 'apm_vad') + + def __init__(self, vad_type, external_vads=None): + self._signal = None + self._level = None + self._level_frame_size = None + self._common_audio_vad = None + self._energy_vad = None + self._apm_vad_probs = None + self._apm_vad_rms = None + self._vad_frame_size = None + self._vad_frame_size_ms = None + self._c_attack = None + self._c_decay = None + + self._vad_type = self.VadType(vad_type) + logging.info('VADs used for annotations: ' + str(self._vad_type)) + + if external_vads is None: + external_vads = {} + self._external_vads = external_vads + + assert len(self._external_vads) == len(external_vads), ( + 'The external VAD names must be unique.') + for vad in external_vads.values(): + if not isinstance(vad, external_vad.ExternalVad): + raise exceptions.InitializationException( + 'Invalid vad type: ' + str(type(vad))) + logging.info('External VAD used for annotation: ' + + str(vad.name)) + + assert os.path.exists(self._VAD_WEBRTC_COMMON_AUDIO_PATH), \ + self._VAD_WEBRTC_COMMON_AUDIO_PATH + assert os.path.exists(self._VAD_WEBRTC_APM_PATH), \ + self._VAD_WEBRTC_APM_PATH + + @classmethod + def GetOutputFileNameTemplate(cls): + return cls._OUTPUT_FILENAME_TEMPLATE + + def GetLevel(self): + return self._level + + def GetLevelFrameSize(self): + return self._level_frame_size + + @classmethod + def GetLevelFrameSizeMs(cls): + return cls._LEVEL_FRAME_SIZE_MS + + def GetVadOutput(self, vad_type): + if vad_type == self.VadType.ENERGY_THRESHOLD: + return self._energy_vad + elif vad_type == self.VadType.WEBRTC_COMMON_AUDIO: + return self._common_audio_vad + elif vad_type == self.VadType.WEBRTC_APM: + return (self._apm_vad_probs, self._apm_vad_rms) + else: + raise exceptions.InitializationException( + 'Invalid vad type: ' + vad_type) + + def GetVadFrameSize(self): + return self._vad_frame_size + + def GetVadFrameSizeMs(self): + return self._vad_frame_size_ms + + def Extract(self, filepath): + # Load signal. + self._signal = signal_processing.SignalProcessingUtils.LoadWav(filepath) + if self._signal.channels != 1: + raise NotImplementedError('Multiple-channel annotations not implemented') + + # Level estimation params. + self._level_frame_size = int(self._signal.frame_rate / 1000 * ( + self._LEVEL_FRAME_SIZE_MS)) + self._c_attack = 0.0 if self._LEVEL_ATTACK_MS == 0 else ( + self._ONE_DB_REDUCTION ** ( + self._LEVEL_FRAME_SIZE_MS / self._LEVEL_ATTACK_MS)) + self._c_decay = 0.0 if self._LEVEL_DECAY_MS == 0 else ( + self._ONE_DB_REDUCTION ** ( + self._LEVEL_FRAME_SIZE_MS / self._LEVEL_DECAY_MS)) + + # Compute level. + self._LevelEstimation() + + # Ideal VAD output, it requires clean speech with high SNR as input. + if self._vad_type.Contains(self.VadType.ENERGY_THRESHOLD): + # Naive VAD based on level thresholding. + vad_threshold = np.percentile(self._level, self._VAD_THRESHOLD) + self._energy_vad = np.uint8(self._level > vad_threshold) + self._vad_frame_size = self._level_frame_size + self._vad_frame_size_ms = self._LEVEL_FRAME_SIZE_MS + if self._vad_type.Contains(self.VadType.WEBRTC_COMMON_AUDIO): + # WebRTC common_audio/ VAD. + self._RunWebRtcCommonAudioVad(filepath, self._signal.frame_rate) + if self._vad_type.Contains(self.VadType.WEBRTC_APM): + # WebRTC modules/audio_processing/ VAD. + self._RunWebRtcApmVad(filepath) + for extvad_name in self._external_vads: + self._external_vads[extvad_name].Run(filepath) + + def Save(self, output_path, annotation_name=""): + ext_kwargs = {'extvad_conf-' + ext_vad: + self._external_vads[ext_vad].GetVadOutput() + for ext_vad in self._external_vads} + # pylint: disable=star-args + np.savez_compressed( + file=os.path.join( + output_path, + self.GetOutputFileNameTemplate().format(annotation_name)), + level=self._level, + level_frame_size=self._level_frame_size, + level_frame_size_ms=self._LEVEL_FRAME_SIZE_MS, + vad_output=self._common_audio_vad, + vad_energy_output=self._energy_vad, + vad_frame_size=self._vad_frame_size, + vad_frame_size_ms=self._vad_frame_size_ms, + vad_probs=self._apm_vad_probs, + vad_rms=self._apm_vad_rms, + **ext_kwargs + ) + + def _LevelEstimation(self): + # Read samples. + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + self._signal).astype(np.float32) / 32768.0 + num_frames = len(samples) // self._level_frame_size + num_samples = num_frames * self._level_frame_size + + # Envelope. + self._level = np.max(np.reshape(np.abs(samples[:num_samples]), ( + num_frames, self._level_frame_size)), axis=1) + assert len(self._level) == num_frames + + # Envelope smoothing. + smooth = lambda curr, prev, k: (1 - k) * curr + k * prev + self._level[0] = smooth(self._level[0], 0.0, self._c_attack) + for i in range(1, num_frames): + self._level[i] = smooth( + self._level[i], self._level[i - 1], self._c_attack if ( + self._level[i] > self._level[i - 1]) else self._c_decay) + + def _RunWebRtcCommonAudioVad(self, wav_file_path, sample_rate): + self._common_audio_vad = None + self._vad_frame_size = None + + # Create temporary output path. + tmp_path = tempfile.mkdtemp() + output_file_path = os.path.join( + tmp_path, os.path.split(wav_file_path)[1] + '_vad.tmp') + + # Call WebRTC VAD. + try: + subprocess.call([ + self._VAD_WEBRTC_COMMON_AUDIO_PATH, + '-i', wav_file_path, + '-o', output_file_path + ], cwd=self._VAD_WEBRTC_PATH) + + # Read bytes. + with open(output_file_path, 'rb') as f: + raw_data = f.read() + + # Parse side information. + self._vad_frame_size_ms = struct.unpack('B', raw_data[0])[0] + self._vad_frame_size = self._vad_frame_size_ms * sample_rate / 1000 + assert self._vad_frame_size_ms in [10, 20, 30] + extra_bits = struct.unpack('B', raw_data[-1])[0] + assert 0 <= extra_bits <= 8 + + # Init VAD vector. + num_bytes = len(raw_data) + num_frames = 8 * (num_bytes - 2) - extra_bits # 8 frames for each byte. + self._common_audio_vad = np.zeros(num_frames, np.uint8) + + # Read VAD decisions. + for i, byte in enumerate(raw_data[1:-1]): + byte = struct.unpack('B', byte)[0] + for j in range(8 if i < num_bytes - 3 else (8 - extra_bits)): + self._common_audio_vad[i * 8 + j] = int(byte & 1) + byte = byte >> 1 + except Exception as e: + logging.error('Error while running the WebRTC VAD (' + e.message + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + + def _RunWebRtcApmVad(self, wav_file_path): + # Create temporary output path. + tmp_path = tempfile.mkdtemp() + output_file_path_probs = os.path.join( + tmp_path, os.path.split(wav_file_path)[1] + '_vad_probs.tmp') + output_file_path_rms = os.path.join( + tmp_path, os.path.split(wav_file_path)[1] + '_vad_rms.tmp') + + # Call WebRTC VAD. + try: + subprocess.call([ + self._VAD_WEBRTC_APM_PATH, + '-i', wav_file_path, + '-o_probs', output_file_path_probs, + '-o_rms', output_file_path_rms + ], cwd=self._VAD_WEBRTC_PATH) + + # Parse annotations. + self._apm_vad_probs = np.fromfile(output_file_path_probs, np.double) + self._apm_vad_rms = np.fromfile(output_file_path_rms, np.double) + assert len(self._apm_vad_rms) == len(self._apm_vad_probs) + + except Exception as e: + logging.error('Error while running the WebRTC APM VAD (' + + e.message + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py new file mode 100644 index 0000000000..5a908e7a44 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/annotations_unittest.py @@ -0,0 +1,157 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the annotations module. +""" + +from __future__ import division +import logging +import os +import shutil +import tempfile +import unittest + +import numpy as np + +from . import annotations +from . import external_vad +from . import input_signal_creator +from . import signal_processing + + +class TestAnnotationsExtraction(unittest.TestCase): + """Unit tests for the annotations module. + """ + + _CLEAN_TMP_OUTPUT = True + _DEBUG_PLOT_VAD = False + _VAD_TYPE_CLASS = annotations.AudioAnnotationsExtractor.VadType + _ALL_VAD_TYPES = (_VAD_TYPE_CLASS.ENERGY_THRESHOLD | + _VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO | + _VAD_TYPE_CLASS.WEBRTC_APM) + + + def setUp(self): + """Create temporary folder.""" + self._tmp_path = tempfile.mkdtemp() + self._wav_file_path = os.path.join(self._tmp_path, 'tone.wav') + pure_tone, _ = input_signal_creator.InputSignalCreator.Create( + 'pure_tone', [440, 1000]) + signal_processing.SignalProcessingUtils.SaveWav( + self._wav_file_path, pure_tone) + self._sample_rate = pure_tone.frame_rate + + def tearDown(self): + """Recursively delete temporary folder.""" + if self._CLEAN_TMP_OUTPUT: + shutil.rmtree(self._tmp_path) + else: + logging.warning(self.id() + ' did not clean the temporary path ' + ( + self._tmp_path)) + + def testFrameSizes(self): + e = annotations.AudioAnnotationsExtractor(self._ALL_VAD_TYPES) + e.Extract(self._wav_file_path) + samples_to_ms = lambda n, sr: 1000 * n // sr + self.assertEqual(samples_to_ms(e.GetLevelFrameSize(), self._sample_rate), + e.GetLevelFrameSizeMs()) + self.assertEqual(samples_to_ms(e.GetVadFrameSize(), self._sample_rate), + e.GetVadFrameSizeMs()) + + def testVoiceActivityDetectors(self): + for vad_type_value in range(0, self._ALL_VAD_TYPES+1): + vad_type = self._VAD_TYPE_CLASS(vad_type_value) + e = annotations.AudioAnnotationsExtractor(vad_type=vad_type_value) + e.Extract(self._wav_file_path) + if vad_type.Contains(self._VAD_TYPE_CLASS.ENERGY_THRESHOLD): + # pylint: disable=unpacking-non-sequence + vad_output = e.GetVadOutput(self._VAD_TYPE_CLASS.ENERGY_THRESHOLD) + self.assertGreater(len(vad_output), 0) + self.assertGreaterEqual(float(np.sum(vad_output)) / len(vad_output), + 0.95) + + if vad_type.Contains(self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO): + # pylint: disable=unpacking-non-sequence + vad_output = e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO) + self.assertGreater(len(vad_output), 0) + self.assertGreaterEqual(float(np.sum(vad_output)) / len(vad_output), + 0.95) + + if vad_type.Contains(self._VAD_TYPE_CLASS.WEBRTC_APM): + # pylint: disable=unpacking-non-sequence + (vad_probs, vad_rms) = e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM) + self.assertGreater(len(vad_probs), 0) + self.assertGreater(len(vad_rms), 0) + self.assertGreaterEqual(float(np.sum(vad_probs)) / len(vad_probs), + 0.5) + self.assertGreaterEqual(float(np.sum(vad_rms)) / len(vad_rms), 20000) + + if self._DEBUG_PLOT_VAD: + frame_times_s = lambda num_frames, frame_size_ms: np.arange( + num_frames).astype(np.float32) * frame_size_ms / 1000.0 + level = e.GetLevel() + t_level = frame_times_s( + num_frames=len(level), + frame_size_ms=e.GetLevelFrameSizeMs()) + t_vad = frame_times_s( + num_frames=len(vad_output), + frame_size_ms=e.GetVadFrameSizeMs()) + import matplotlib.pyplot as plt + plt.figure() + plt.hold(True) + plt.plot(t_level, level) + plt.plot(t_vad, vad_output * np.max(level), '.') + plt.show() + + def testSaveLoad(self): + e = annotations.AudioAnnotationsExtractor(self._ALL_VAD_TYPES) + e.Extract(self._wav_file_path) + e.Save(self._tmp_path, "fake-annotation") + + data = np.load(os.path.join( + self._tmp_path, + e.GetOutputFileNameTemplate().format("fake-annotation"))) + np.testing.assert_array_equal(e.GetLevel(), data['level']) + self.assertEqual(np.float32, data['level'].dtype) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.ENERGY_THRESHOLD), + data['vad_energy_output']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_COMMON_AUDIO), + data['vad_output']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM)[0], data['vad_probs']) + np.testing.assert_array_equal( + e.GetVadOutput(self._VAD_TYPE_CLASS.WEBRTC_APM)[1], data['vad_rms']) + self.assertEqual(np.uint8, data['vad_energy_output'].dtype) + self.assertEqual(np.float64, data['vad_probs'].dtype) + self.assertEqual(np.float64, data['vad_rms'].dtype) + + def testEmptyExternalShouldNotCrash(self): + for vad_type_value in range(0, self._ALL_VAD_TYPES+1): + annotations.AudioAnnotationsExtractor(vad_type_value, {}) + + def testFakeExternalSaveLoad(self): + def FakeExternalFactory(): + return external_vad.ExternalVad( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'fake_external_vad.py'), + 'fake' + ) + for vad_type_value in range(0, self._ALL_VAD_TYPES+1): + e = annotations.AudioAnnotationsExtractor( + vad_type_value, + {'fake': FakeExternalFactory()}) + e.Extract(self._wav_file_path) + e.Save(self._tmp_path, annotation_name="fake-annotation") + data = np.load(os.path.join( + self._tmp_path, + e.GetOutputFileNameTemplate().format("fake-annotation"))) + self.assertEqual(np.float32, data['extvad_conf-fake'].dtype) + np.testing.assert_almost_equal(np.arange(100, dtype=np.float32), + data['extvad_conf-fake']) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json new file mode 100644 index 0000000000..5c3277bac0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_configs/default.json @@ -0,0 +1 @@ +{"-all_default": null} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc new file mode 100644 index 0000000000..a6184b5f9f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/apm_vad.cc @@ -0,0 +1,94 @@ +// Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include <array> +#include <fstream> +#include <memory> + +#include "common_audio/wav_file.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" +#include "rtc_base/flags.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace test { +namespace { + +constexpr uint8_t kAudioFrameLengthMilliseconds = 10; +constexpr int kMaxSampleRate = 48000; +constexpr size_t kMaxFrameLen = + kAudioFrameLengthMilliseconds * kMaxSampleRate / 1000; + +DEFINE_string(i, "", "Input wav file"); +DEFINE_string(o_probs, "", "VAD probabilities output file"); +DEFINE_string(o_rms, "", "VAD output file"); + +int main(int argc, char* argv[]) { + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) + return 1; + + // Open wav input file and check properties. + WavReader wav_reader(FLAG_i); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + const size_t audio_frame_len = rtc::CheckedDivExact( + kAudioFrameLengthMilliseconds * wav_reader.sample_rate(), 1000); + if (audio_frame_len > kMaxFrameLen) { + RTC_LOG(LS_ERROR) << "The frame size and/or the sample rate are too large."; + return 1; + } + + // Create output file and write header. + std::ofstream out_probs_file(FLAG_o_probs, std::ofstream::binary); + std::ofstream out_rms_file(FLAG_o_rms, std::ofstream::binary); + + // Run VAD and write decisions. + VoiceActivityDetector vad; + std::array<int16_t, kMaxFrameLen> samples; + + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_len, samples.data()); + if (read_samples < audio_frame_len) { + break; + } + vad.ProcessChunk(samples.data(), audio_frame_len, wav_reader.sample_rate()); + // Write output. + auto probs = vad.chunkwise_voice_probabilities(); + auto rms = vad.chunkwise_rms(); + RTC_CHECK_EQ(probs.size(), rms.size()); + RTC_CHECK_EQ(sizeof(double), 8); + + for (const auto& p : probs) { + out_probs_file.write(reinterpret_cast<const char*>(&p), 8); + } + for (const auto& r : rms) { + out_rms_file.write(reinterpret_cast<const char*>(&r), 8); + } + } + + out_probs_file.close(); + out_rms_file.close(); + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py new file mode 100644 index 0000000000..399e3864dc --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/audioproc_wrapper.py @@ -0,0 +1,96 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Class implementing a wrapper for APM simulators. +""" + +import cProfile +import logging +import os +import subprocess + +from . import data_access +from . import exceptions + + +class AudioProcWrapper(object): + """Wrapper for APM simulators. + """ + + DEFAULT_APM_SIMULATOR_BIN_PATH = os.path.abspath(os.path.join( + os.pardir, 'audioproc_f')) + OUTPUT_FILENAME = 'output.wav' + + def __init__(self, simulator_bin_path): + """Ctor. + + Args: + simulator_bin_path: path to the APM simulator binary. + """ + self._simulator_bin_path = simulator_bin_path + self._config = None + self._output_signal_filepath = None + + # Profiler instance to measure running time. + self._profiler = cProfile.Profile() + + @property + def output_filepath(self): + return self._output_signal_filepath + + def Run(self, config_filepath, capture_input_filepath, output_path, + render_input_filepath=None): + """Runs APM simulator. + + Args: + config_filepath: path to the configuration file specifying the arguments + for the APM simulator. + capture_input_filepath: path to the capture audio track input file (aka + forward or near-end). + output_path: path of the audio track output file. + render_input_filepath: path to the render audio track input file (aka + reverse or far-end). + """ + # Init. + self._output_signal_filepath = os.path.join( + output_path, self.OUTPUT_FILENAME) + profiling_stats_filepath = os.path.join(output_path, 'profiling.stats') + + # Skip if the output has already been generated. + if os.path.exists(self._output_signal_filepath) and os.path.exists( + profiling_stats_filepath): + return + + # Load configuration. + self._config = data_access.AudioProcConfigFile.Load(config_filepath) + + # Set remaining parameters. + if not os.path.exists(capture_input_filepath): + raise exceptions.FileNotFoundError('cannot find capture input file') + self._config['-i'] = capture_input_filepath + self._config['-o'] = self._output_signal_filepath + if render_input_filepath is not None: + if not os.path.exists(render_input_filepath): + raise exceptions.FileNotFoundError('cannot find render input file') + self._config['-ri'] = render_input_filepath + + # Build arguments list. + args = [self._simulator_bin_path] + for param_name in self._config: + args.append(param_name) + if self._config[param_name] is not None: + args.append(str(self._config[param_name])) + logging.debug(' '.join(args)) + + # Run. + self._profiler.enable() + subprocess.call(args) + self._profiler.disable() + + # Save profiling stats. + self._profiler.dump_stats(profiling_stats_filepath) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py new file mode 100644 index 0000000000..fc1f44bcf8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py @@ -0,0 +1,242 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Imports a filtered subset of the scores and configurations computed +by apm_quality_assessment.py into a pandas data frame. +""" + +import argparse +import glob +import logging +import os +import re +import sys + +try: + import pandas as pd +except ImportError: + logging.critical('Cannot import the third-party Python package pandas') + sys.exit(1) + +from . import data_access as data_access +from . import simulation as sim + +# Compiled regular expressions used to extract score descriptors. +RE_CONFIG_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)') +RE_CAPTURE_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)') +RE_RENDER_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)') +RE_ECHO_SIM_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)') +RE_TEST_DATA_GEN_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)') +RE_TEST_DATA_GEN_PARAMS = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)') +RE_SCORE_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)') + + +def InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = argparse.ArgumentParser(description=( + 'Override this description in a user script by changing' + ' `parser.description` of the returned parser.')) + + parser.add_argument('-o', '--output_dir', required=True, + help=('the same base path used with the ' + 'apm_quality_assessment tool')) + + parser.add_argument('-c', '--config_names', type=re.compile, + help=('regular expression to filter the APM configuration' + ' names')) + + parser.add_argument('-i', '--capture_names', type=re.compile, + help=('regular expression to filter the capture signal ' + 'names')) + + parser.add_argument('-r', '--render_names', type=re.compile, + help=('regular expression to filter the render signal ' + 'names')) + + parser.add_argument('-e', '--echo_simulator_names', type=re.compile, + help=('regular expression to filter the echo simulator ' + 'names')) + + parser.add_argument('-t', '--test_data_generators', type=re.compile, + help=('regular expression to filter the test data ' + 'generator names')) + + parser.add_argument('-s', '--eval_scores', type=re.compile, + help=('regular expression to filter the evaluation score ' + 'names')) + + return parser + + +def _GetScoreDescriptors(score_filepath): + """Extracts a score descriptor from the given score file path. + + Args: + score_filepath: path to the score file. + + Returns: + A tuple of strings (APM configuration name, capture audio track name, + render audio track name, echo simulator name, test data generator name, + test data generator parameters as string, evaluation score name). + """ + fields = score_filepath.split(os.sep)[-7:] + extract_name = lambda index, reg_expr: ( + reg_expr.match(fields[index]).groups(0)[0]) + return ( + extract_name(0, RE_CONFIG_NAME), + extract_name(1, RE_CAPTURE_NAME), + extract_name(2, RE_RENDER_NAME), + extract_name(3, RE_ECHO_SIM_NAME), + extract_name(4, RE_TEST_DATA_GEN_NAME), + extract_name(5, RE_TEST_DATA_GEN_PARAMS), + extract_name(6, RE_SCORE_NAME), + ) + + +def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name, + test_data_gen_name, score_name, args): + """Decides whether excluding a score. + + A set of optional regular expressions in args is used to determine if the + score should be excluded (depending on its |*_name| descriptors). + + Args: + config_name: APM configuration name. + capture_name: capture audio track name. + render_name: render audio track name. + echo_simulator_name: echo simulator name. + test_data_gen_name: test data generator name. + score_name: evaluation score name. + args: parsed arguments. + + Returns: + A boolean. + """ + value_regexpr_pairs = [ + (config_name, args.config_names), + (capture_name, args.capture_names), + (render_name, args.render_names), + (echo_simulator_name, args.echo_simulator_names), + (test_data_gen_name, args.test_data_generators), + (score_name, args.eval_scores), + ] + + # Score accepted if each value matches the corresponding regular expression. + for value, regexpr in value_regexpr_pairs: + if regexpr is None: + continue + if not regexpr.match(value): + return True + + return False + + +def FindScores(src_path, args): + """Given a search path, find scores and return a DataFrame object. + + Args: + src_path: Search path pattern. + args: parsed arguments. + + Returns: + A DataFrame object. + """ + # Get scores. + scores = [] + for score_filepath in glob.iglob(src_path): + # Extract score descriptor fields from the path. + (config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + test_data_gen_params, + score_name) = _GetScoreDescriptors(score_filepath) + + # Ignore the score if required. + if _ExcludeScore( + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + score_name, + args): + logging.info( + 'ignored score: %s %s %s %s %s %s', + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + score_name) + continue + + # Read metadata and score. + metadata = data_access.Metadata.LoadAudioTestDataPaths( + os.path.split(score_filepath)[0]) + score = data_access.ScoreFile.Load(score_filepath) + + # Add a score with its descriptor fields. + scores.append(( + metadata['clean_capture_input_filepath'], + metadata['echo_free_capture_filepath'], + metadata['echo_filepath'], + metadata['render_filepath'], + metadata['capture_filepath'], + metadata['apm_output_filepath'], + metadata['apm_reference_filepath'], + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + test_data_gen_params, + score_name, + score, + )) + + return pd.DataFrame( + data=scores, + columns=( + 'clean_capture_input_filepath', + 'echo_free_capture_filepath', + 'echo_filepath', + 'render_filepath', + 'capture_filepath', + 'apm_output_filepath', + 'apm_reference_filepath', + 'apm_config', + 'capture', + 'render', + 'echo_simulator', + 'test_data_gen', + 'test_data_gen_params', + 'eval_score_name', + 'score', + )) + + +def ConstructSrcPath(args): + return os.path.join( + args.output_dir, + sim.ApmModuleSimulator.GetPrefixApmConfig() + '*', + sim.ApmModuleSimulator.GetPrefixCapture() + '*', + sim.ApmModuleSimulator.GetPrefixRender() + '*', + sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*', + sim.ApmModuleSimulator.GetPrefixScore() + '*') diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py new file mode 100644 index 0000000000..17aa7e2b67 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/data_access.py @@ -0,0 +1,154 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Data access utility functions and classes. +""" + +import json +import os + + +def MakeDirectory(path): + """Makes a directory recursively without rising exceptions if existing. + + Args: + path: path to the directory to be created. + """ + if os.path.exists(path): + return + os.makedirs(path) + + +class Metadata(object): + """Data access class to save and load metadata. + """ + + def __init__(self): + pass + + _GENERIC_METADATA_SUFFIX = '.mdata' + _AUDIO_TEST_DATA_FILENAME = 'audio_test_data.json' + + @classmethod + def LoadFileMetadata(cls, filepath): + """Loads generic metadata linked to a file. + + Args: + filepath: path to the metadata file to read. + + Returns: + A dict. + """ + with open(filepath + cls._GENERIC_METADATA_SUFFIX) as f: + return json.load(f) + + @classmethod + def SaveFileMetadata(cls, filepath, metadata): + """Saves generic metadata linked to a file. + + Args: + filepath: path to the metadata file to write. + metadata: a dict. + """ + with open(filepath + cls._GENERIC_METADATA_SUFFIX, 'w') as f: + json.dump(metadata, f) + + @classmethod + def LoadAudioTestDataPaths(cls, metadata_path): + """Loads the input and the reference audio track paths. + + Args: + metadata_path: path to the directory containing the metadata file. + + Returns: + Tuple with the paths to the input and output audio tracks. + """ + metadata_filepath = os.path.join( + metadata_path, cls._AUDIO_TEST_DATA_FILENAME) + with open(metadata_filepath) as f: + return json.load(f) + + @classmethod + def SaveAudioTestDataPaths(cls, output_path, **filepaths): + """Saves the input and the reference audio track paths. + + Args: + output_path: path to the directory containing the metadata file. + + Keyword Args: + filepaths: collection of audio track file paths to save. + """ + output_filepath = os.path.join(output_path, cls._AUDIO_TEST_DATA_FILENAME) + with open(output_filepath, 'w') as f: + json.dump(filepaths, f) + + +class AudioProcConfigFile(object): + """Data access to load/save APM simulator argument lists. + + The arguments stored in the config files are used to control the APM flags. + """ + + def __init__(self): + pass + + @classmethod + def Load(cls, filepath): + """Loads a configuration file for an APM simulator. + + Args: + filepath: path to the configuration file. + + Returns: + A dict containing the configuration. + """ + with open(filepath) as f: + return json.load(f) + + @classmethod + def Save(cls, filepath, config): + """Saves a configuration file for an APM simulator. + + Args: + filepath: path to the configuration file. + config: a dict containing the configuration. + """ + with open(filepath, 'w') as f: + json.dump(config, f) + + +class ScoreFile(object): + """Data access class to save and load float scalar scores. + """ + + def __init__(self): + pass + + @classmethod + def Load(cls, filepath): + """Loads a score from file. + + Args: + filepath: path to the score file. + + Returns: + A float encoding the score. + """ + with open(filepath) as f: + return float(f.readline().strip()) + + @classmethod + def Save(cls, filepath, score): + """Saves a score into a file. + + Args: + filepath: path to the score file. + score: float encoding the score. + """ + with open(filepath, 'w') as f: + f.write('{0:f}\n'.format(score)) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py new file mode 100644 index 0000000000..a1621966fe --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation.py @@ -0,0 +1,136 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Echo path simulation module. +""" + +import hashlib +import os + +from . import signal_processing + + +class EchoPathSimulator(object): + """Abstract class for the echo path simulators. + + In general, an echo path simulator is a function of the render signal and + simulates the propagation of the latter into the microphone (e.g., due to + mechanical or electrical paths). + """ + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self): + pass + + def Simulate(self, output_path): + """Creates the echo signal and stores it in an audio file (abstract method). + + Args: + output_path: Path in which any output can be saved. + + Returns: + Path to the generated audio track file or None if no echo is present. + """ + raise NotImplementedError() + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers an EchoPathSimulator implementation. + + Decorator to automatically register the classes that extend + EchoPathSimulator. + Example usage: + + @EchoPathSimulator.RegisterClass + class NoEchoPathSimulator(EchoPathSimulator): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + +@EchoPathSimulator.RegisterClass +class NoEchoPathSimulator(EchoPathSimulator): + """Simulates absence of echo.""" + + NAME = 'noecho' + + def __init__(self): + EchoPathSimulator.__init__(self) + + def Simulate(self, output_path): + return None + + +@EchoPathSimulator.RegisterClass +class LinearEchoPathSimulator(EchoPathSimulator): + """Simulates linear echo path. + + This class applies a given impulse response to the render input and then it + sums the signal to the capture input signal. + """ + + NAME = 'linear' + + def __init__(self, render_input_filepath, impulse_response): + """ + Args: + render_input_filepath: Render audio track file. + impulse_response: list or numpy vector of float values. + """ + EchoPathSimulator.__init__(self) + self._render_input_filepath = render_input_filepath + self._impulse_response = impulse_response + + def Simulate(self, output_path): + """Simulates linear echo path.""" + # Form the file name with a hash of the impulse response. + impulse_response_hash = hashlib.sha256( + str(self._impulse_response).encode('utf-8', 'ignore')).hexdigest() + echo_filepath = os.path.join(output_path, 'linear_echo_{}.wav'.format( + impulse_response_hash)) + + # If the simulated echo audio track file does not exists, create it. + if not os.path.exists(echo_filepath): + render = signal_processing.SignalProcessingUtils.LoadWav( + self._render_input_filepath) + echo = signal_processing.SignalProcessingUtils.ApplyImpulseResponse( + render, self._impulse_response) + signal_processing.SignalProcessingUtils.SaveWav(echo_filepath, echo) + + return echo_filepath + + +@EchoPathSimulator.RegisterClass +class RecordedEchoPathSimulator(EchoPathSimulator): + """Uses recorded echo. + + This class uses the clean capture input file name to build the file name of + the corresponding recording containing echo (a predefined suffix is used). + Such a file is expected to be already existing. + """ + + NAME = 'recorded' + + _FILE_NAME_SUFFIX = '_echo' + + def __init__(self, render_input_filepath): + EchoPathSimulator.__init__(self) + self._render_input_filepath = render_input_filepath + + def Simulate(self, output_path): + """Uses recorded echo path.""" + path, file_name_ext = os.path.split(self._render_input_filepath) + file_name, file_ext = os.path.splitext(file_name_ext) + echo_filepath = os.path.join(path, '{}{}{}'.format( + file_name, self._FILE_NAME_SUFFIX, file_ext)) + assert os.path.exists(echo_filepath), ( + 'cannot find the echo audio track file {}'.format(echo_filepath)) + return echo_filepath diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py new file mode 100644 index 0000000000..eeffd1d71b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_factory.py @@ -0,0 +1,48 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Echo path simulation factory module. +""" + +import numpy as np + +from . import echo_path_simulation + + +class EchoPathSimulatorFactory(object): + + # TODO(alessiob): Replace 5 ms delay (at 48 kHz sample rate) with a more + # realistic impulse response. + _LINEAR_ECHO_IMPULSE_RESPONSE = np.array([0.0]*(5 * 48) + [0.15]) + + def __init__(self): + pass + + @classmethod + def GetInstance(cls, echo_path_simulator_class, render_input_filepath): + """Creates an EchoPathSimulator instance given a class object. + + Args: + echo_path_simulator_class: EchoPathSimulator class object (not an + instance). + render_input_filepath: Path to the render audio track file. + + Returns: + An EchoPathSimulator instance. + """ + assert render_input_filepath is not None or ( + echo_path_simulator_class == echo_path_simulation.NoEchoPathSimulator) + + if echo_path_simulator_class == echo_path_simulation.NoEchoPathSimulator: + return echo_path_simulation.NoEchoPathSimulator() + elif echo_path_simulator_class == ( + echo_path_simulation.LinearEchoPathSimulator): + return echo_path_simulation.LinearEchoPathSimulator( + render_input_filepath, cls._LINEAR_ECHO_IMPULSE_RESPONSE) + else: + return echo_path_simulator_class(render_input_filepath) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py new file mode 100644 index 0000000000..d9ef2c61e4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/echo_path_simulation_unittest.py @@ -0,0 +1,81 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the echo path simulation module. +""" + +import shutil +import os +import tempfile +import unittest + +import pydub + +from . import echo_path_simulation +from . import echo_path_simulation_factory +from . import signal_processing + + +class TestEchoPathSimulators(unittest.TestCase): + """Unit tests for the eval_scores module. + """ + + def setUp(self): + """Creates temporary data.""" + self._tmp_path = tempfile.mkdtemp() + + # Create and save white noise. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + white_noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + self._audio_track_num_samples = ( + signal_processing.SignalProcessingUtils.CountSamples(white_noise)) + self._audio_track_filepath = os.path.join(self._tmp_path, 'white_noise.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._audio_track_filepath, white_noise) + + # Make a copy the white noise audio track file; it will be used by + # echo_path_simulation.RecordedEchoPathSimulator. + shutil.copy(self._audio_track_filepath, os.path.join( + self._tmp_path, 'white_noise_echo.wav')) + + def tearDown(self): + """Recursively deletes temporary folders.""" + shutil.rmtree(self._tmp_path) + + def testRegisteredClasses(self): + # Check that there is at least one registered echo path simulator. + registered_classes = ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES) + self.assertIsInstance(registered_classes, dict) + self.assertGreater(len(registered_classes), 0) + + # Instance factory. + factory = echo_path_simulation_factory.EchoPathSimulatorFactory() + + # Try each registered echo path simulator. + for echo_path_simulator_name in registered_classes: + simulator = factory.GetInstance( + echo_path_simulator_class=registered_classes[ + echo_path_simulator_name], + render_input_filepath=self._audio_track_filepath) + + echo_filepath = simulator.Simulate(self._tmp_path) + if echo_filepath is None: + self.assertEqual(echo_path_simulation.NoEchoPathSimulator.NAME, + echo_path_simulator_name) + # No other tests in this case. + continue + + # Check that the echo audio track file exists and its length is greater or + # equal to that of the render audio track. + self.assertTrue(os.path.exists(echo_filepath)) + echo = signal_processing.SignalProcessingUtils.LoadWav(echo_filepath) + self.assertGreaterEqual( + signal_processing.SignalProcessingUtils.CountSamples(echo), + self._audio_track_num_samples) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py new file mode 100644 index 0000000000..420afd2243 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores.py @@ -0,0 +1,341 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Evaluation score abstract class and implementations. +""" + +from __future__ import division +import logging +import os +import re +import subprocess +import sys + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import data_access +from . import exceptions +from . import signal_processing + + +class EvaluationScore(object): + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self, score_filename_prefix): + self._score_filename_prefix = score_filename_prefix + self._input_signal_metadata = None + self._reference_signal = None + self._reference_signal_filepath = None + self._tested_signal = None + self._tested_signal_filepath = None + self._output_filepath = None + self._score = None + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers an EvaluationScore implementation. + + Decorator to automatically register the classes that extend EvaluationScore. + Example usage: + + @EvaluationScore.RegisterClass + class AudioLevelScore(EvaluationScore): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + @property + def output_filepath(self): + return self._output_filepath + + @property + def score(self): + return self._score + + def SetInputSignalMetadata(self, metadata): + """Sets input signal metadata. + + Args: + metadata: dict instance. + """ + self._input_signal_metadata = metadata + + def SetReferenceSignalFilepath(self, filepath): + """Sets the path to the audio track used as reference signal. + + Args: + filepath: path to the reference audio track. + """ + self._reference_signal_filepath = filepath + + def SetTestedSignalFilepath(self, filepath): + """Sets the path to the audio track used as test signal. + + Args: + filepath: path to the test audio track. + """ + self._tested_signal_filepath = filepath + + def Run(self, output_path): + """Extracts the score for the set test data pair. + + Args: + output_path: path to the directory where the output is written. + """ + self._output_filepath = os.path.join( + output_path, self._score_filename_prefix + self.NAME + '.txt') + try: + # If the score has already been computed, load. + self._LoadScore() + logging.debug('score found and loaded') + except IOError: + # Compute the score. + logging.debug('score not found, compute') + self._Run(output_path) + + def _Run(self, output_path): + # Abstract method. + raise NotImplementedError() + + def _LoadReferenceSignal(self): + assert self._reference_signal_filepath is not None + self._reference_signal = signal_processing.SignalProcessingUtils.LoadWav( + self._reference_signal_filepath) + + def _LoadTestedSignal(self): + assert self._tested_signal_filepath is not None + self._tested_signal = signal_processing.SignalProcessingUtils.LoadWav( + self._tested_signal_filepath) + + + def _LoadScore(self): + return data_access.ScoreFile.Load(self._output_filepath) + + def _SaveScore(self): + return data_access.ScoreFile.Save(self._output_filepath, self._score) + + +@EvaluationScore.RegisterClass +class AudioLevelPeakScore(EvaluationScore): + """Peak audio level score. + + Defined as the difference between the peak audio level of the tested and + the reference signals. + + Unit: dB + Ideal: 0 dB + Worst case: +/-inf dB + """ + + NAME = 'audio_level_peak' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + + def _Run(self, output_path): + self._LoadReferenceSignal() + self._LoadTestedSignal() + self._score = self._tested_signal.dBFS - self._reference_signal.dBFS + self._SaveScore() + + +@EvaluationScore.RegisterClass +class MeanAudioLevelScore(EvaluationScore): + """Mean audio level score. + + Defined as the difference between the mean audio level of the tested and + the reference signals. + + Unit: dB + Ideal: 0 dB + Worst case: +/-inf dB + """ + + NAME = 'audio_level_mean' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + + def _Run(self, output_path): + self._LoadReferenceSignal() + self._LoadTestedSignal() + + dbfs_diffs_sum = 0.0 + seconds = min(len(self._tested_signal), len(self._reference_signal)) // 1000 + for t in range(seconds): + t0 = t * seconds + t1 = t0 + seconds + dbfs_diffs_sum += ( + self._tested_signal[t0:t1].dBFS - self._reference_signal[t0:t1].dBFS) + self._score = dbfs_diffs_sum / float(seconds) + self._SaveScore() + + +@EvaluationScore.RegisterClass +class PolqaScore(EvaluationScore): + """POLQA score. + + See http://www.polqa.info/. + + Unit: MOS + Ideal: 4.5 + Worst case: 1.0 + """ + + NAME = 'polqa' + + def __init__(self, score_filename_prefix, polqa_bin_filepath): + EvaluationScore.__init__(self, score_filename_prefix) + + # POLQA binary file path. + self._polqa_bin_filepath = polqa_bin_filepath + if not os.path.exists(self._polqa_bin_filepath): + logging.error('cannot find POLQA tool binary file') + raise exceptions.FileNotFoundError() + + # Path to the POLQA directory with binary and license files. + self._polqa_tool_path, _ = os.path.split(self._polqa_bin_filepath) + + def _Run(self, output_path): + polqa_out_filepath = os.path.join(output_path, 'polqa.out') + if os.path.exists(polqa_out_filepath): + os.unlink(polqa_out_filepath) + + args = [ + self._polqa_bin_filepath, '-t', '-q', '-Overwrite', + '-Ref', self._reference_signal_filepath, + '-Test', self._tested_signal_filepath, + '-LC', 'NB', + '-Out', polqa_out_filepath, + ] + logging.debug(' '.join(args)) + subprocess.call(args, cwd=self._polqa_tool_path) + + # Parse POLQA tool output and extract the score. + polqa_output = self._ParseOutputFile(polqa_out_filepath) + self._score = float(polqa_output['PolqaScore']) + + self._SaveScore() + + @classmethod + def _ParseOutputFile(cls, polqa_out_filepath): + """ + Parses the POLQA tool output formatted as a table ('-t' option). + + Args: + polqa_out_filepath: path to the POLQA tool output file. + + Returns: + A dict. + """ + data = [] + with open(polqa_out_filepath) as f: + for line in f: + line = line.strip() + if len(line) == 0 or line.startswith('*'): + # Ignore comments. + continue + # Read fields. + data.append(re.split(r'\t+', line)) + + # Two rows expected (header and values). + assert len(data) == 2, 'Cannot parse POLQA output' + number_of_fields = len(data[0]) + assert number_of_fields == len(data[1]) + + # Build and return a dictionary with field names (header) as keys and the + # corresponding field values as values. + return {data[0][index]: data[1][index] for index in range(number_of_fields)} + + +@EvaluationScore.RegisterClass +class TotalHarmonicDistorsionScore(EvaluationScore): + """Total harmonic distorsion plus noise score. + + Total harmonic distorsion plus noise score. + See "https://en.wikipedia.org/wiki/Total_harmonic_distortion#THD.2BN". + + Unit: -. + Ideal: 0. + Worst case: +inf + """ + + NAME = 'thd' + + def __init__(self, score_filename_prefix): + EvaluationScore.__init__(self, score_filename_prefix) + self._input_frequency = None + + def _Run(self, output_path): + # TODO(aleloi): Integrate changes made locally. + self._CheckInputSignal() + + self._LoadTestedSignal() + if self._tested_signal.channels != 1: + raise exceptions.EvaluationScoreException( + 'unsupported number of channels') + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + self._tested_signal) + + # Init. + num_samples = len(samples) + duration = len(self._tested_signal) / 1000.0 + scaling = 2.0 / num_samples + max_freq = self._tested_signal.frame_rate / 2 + f0_freq = float(self._input_frequency) + t = np.linspace(0, duration, num_samples) + + # Analyze harmonics. + b_terms = [] + n = 1 + while f0_freq * n < max_freq: + x_n = np.sum(samples * np.sin(2.0 * np.pi * n * f0_freq * t)) * scaling + y_n = np.sum(samples * np.cos(2.0 * np.pi * n * f0_freq * t)) * scaling + b_terms.append(np.sqrt(x_n**2 + y_n**2)) + n += 1 + + output_without_fundamental = samples - b_terms[0] * np.sin( + 2.0 * np.pi * f0_freq * t) + distortion_and_noise = np.sqrt(np.sum( + output_without_fundamental**2) * np.pi * scaling) + + # TODO(alessiob): Fix or remove if not needed. + # thd = np.sqrt(np.sum(b_terms[1:]**2)) / b_terms[0] + + # TODO(alessiob): Check the range of |thd_plus_noise| and update the class + # docstring above if accordingly. + thd_plus_noise = distortion_and_noise / b_terms[0] + + self._score = thd_plus_noise + self._SaveScore() + + def _CheckInputSignal(self): + # Check input signal and get properties. + try: + if self._input_signal_metadata['signal'] != 'pure_tone': + raise exceptions.EvaluationScoreException( + 'The THD score requires a pure tone as input signal') + self._input_frequency = self._input_signal_metadata['frequency'] + if self._input_signal_metadata['test_data_gen_name'] != 'identity' or ( + self._input_signal_metadata['test_data_gen_config'] != 'default'): + raise exceptions.EvaluationScoreException( + 'The THD score cannot be used with any test data generator other ' + 'than "identity"') + except TypeError: + raise exceptions.EvaluationScoreException( + 'The THD score requires an input signal with associated metadata') + except KeyError: + raise exceptions.EvaluationScoreException( + 'Invalid input signal metadata to compute the THD score') diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py new file mode 100644 index 0000000000..c2ef317cc9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_factory.py @@ -0,0 +1,51 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""EvaluationScore factory class. +""" + +import logging + +from . import exceptions +from . import eval_scores + + +class EvaluationScoreWorkerFactory(object): + """Factory class used to instantiate evaluation score workers. + + The ctor gets the parametrs that are used to instatiate the evaluation score + workers. + """ + + def __init__(self, polqa_tool_bin_path): + self._score_filename_prefix = None + self._polqa_tool_bin_path = polqa_tool_bin_path + + def SetScoreFilenamePrefix(self, prefix): + self._score_filename_prefix = prefix + + def GetInstance(self, evaluation_score_class): + """Creates an EvaluationScore instance given a class object. + + Args: + evaluation_score_class: EvaluationScore class object (not an instance). + + Returns: + An EvaluationScore instance. + """ + if self._score_filename_prefix is None: + raise exceptions.InitializationException( + 'The score file name prefix for evaluation score workers is not set') + logging.debug( + 'factory producing a %s evaluation score', evaluation_score_class) + + if evaluation_score_class == eval_scores.PolqaScore: + return eval_scores.PolqaScore( + self._score_filename_prefix, self._polqa_tool_bin_path) + else: + return evaluation_score_class(self._score_filename_prefix) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py new file mode 100644 index 0000000000..ddb5d0b07a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/eval_scores_unittest.py @@ -0,0 +1,131 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the eval_scores module. +""" + +import os +import shutil +import tempfile +import unittest + +import pydub + +from . import data_access +from . import eval_scores +from . import eval_scores_factory +from . import signal_processing + + +class TestEvalScores(unittest.TestCase): + """Unit tests for the eval_scores module. + """ + + def setUp(self): + """Create temporary output folder and two audio track files.""" + self._output_path = tempfile.mkdtemp() + + # Create fake reference and tested (i.e., APM output) audio track files. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + fake_reference_signal = ( + signal_processing.SignalProcessingUtils.GenerateWhiteNoise(silence)) + fake_tested_signal = ( + signal_processing.SignalProcessingUtils.GenerateWhiteNoise(silence)) + + # Save fake audio tracks. + self._fake_reference_signal_filepath = os.path.join( + self._output_path, 'fake_ref.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_reference_signal_filepath, fake_reference_signal) + self._fake_tested_signal_filepath = os.path.join( + self._output_path, 'fake_test.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_tested_signal_filepath, fake_tested_signal) + + def tearDown(self): + """Recursively delete temporary folder.""" + shutil.rmtree(self._output_path) + + def testRegisteredClasses(self): + # Evaluation score names to exclude (tested separately). + exceptions = ['thd'] + + # Preliminary check. + self.assertTrue(os.path.exists(self._output_path)) + + # Check that there is at least one registered evaluation score worker. + registered_classes = eval_scores.EvaluationScore.REGISTERED_CLASSES + self.assertIsInstance(registered_classes, dict) + self.assertGreater(len(registered_classes), 0) + + # Instance evaluation score workers factory with fake dependencies. + eval_score_workers_factory = ( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'fake_polqa'))) + eval_score_workers_factory.SetScoreFilenamePrefix('scores-') + + # Try each registered evaluation score worker. + for eval_score_name in registered_classes: + if eval_score_name in exceptions: + continue + + # Instance evaluation score worker. + eval_score_worker = eval_score_workers_factory.GetInstance( + registered_classes[eval_score_name]) + + # Set fake input metadata and reference and test file paths, then run. + eval_score_worker.SetReferenceSignalFilepath( + self._fake_reference_signal_filepath) + eval_score_worker.SetTestedSignalFilepath( + self._fake_tested_signal_filepath) + eval_score_worker.Run(self._output_path) + + # Check output. + score = data_access.ScoreFile.Load(eval_score_worker.output_filepath) + self.assertTrue(isinstance(score, float)) + + def testTotalHarmonicDistorsionScore(self): + # Init. + pure_tone_freq = 5000.0 + eval_score_worker = eval_scores.TotalHarmonicDistorsionScore('scores-') + eval_score_worker.SetInputSignalMetadata({ + 'signal': 'pure_tone', + 'frequency': pure_tone_freq, + 'test_data_gen_name': 'identity', + 'test_data_gen_config': 'default', + }) + template = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + + # Create 3 test signals: pure tone, pure tone + white noise, white noise + # only. + pure_tone = signal_processing.SignalProcessingUtils.GeneratePureTone( + template, pure_tone_freq) + white_noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + template) + noisy_tone = signal_processing.SignalProcessingUtils.MixSignals( + pure_tone, white_noise) + + # Compute scores for increasingly distorted pure tone signals. + scores = [None, None, None] + for index, tested_signal in enumerate([pure_tone, noisy_tone, white_noise]): + # Save signal. + tmp_filepath = os.path.join(self._output_path, 'tmp_thd.wav') + signal_processing.SignalProcessingUtils.SaveWav( + tmp_filepath, tested_signal) + + # Compute score. + eval_score_worker.SetTestedSignalFilepath(tmp_filepath) + eval_score_worker.Run(self._output_path) + scores[index] = eval_score_worker.score + + # Remove output file to avoid caching. + os.remove(eval_score_worker.output_filepath) + + # Validate scores (lowest score with a pure tone). + self.assertTrue(all([scores[i + 1] > scores[i] for i in range(2)])) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py new file mode 100644 index 0000000000..09ded4cbd5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/evaluation.py @@ -0,0 +1,53 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Evaluator of the APM module. +""" + +import logging + + +class ApmModuleEvaluator(object): + """APM evaluator class. + """ + + def __init__(self): + pass + + @classmethod + def Run(cls, evaluation_score_workers, apm_input_metadata, + apm_output_filepath, reference_input_filepath, output_path): + """Runs the evaluation. + + Iterates over the given evaluation score workers. + + Args: + evaluation_score_workers: list of EvaluationScore instances. + apm_input_metadata: dictionary with metadata of the APM input. + apm_output_filepath: path to the audio track file with the APM output. + reference_input_filepath: path to the reference audio track file. + output_path: output path. + + Returns: + A dict of evaluation score name and score pairs. + """ + # Init. + scores = {} + + for evaluation_score_worker in evaluation_score_workers: + logging.info(' computing <%s> score', evaluation_score_worker.NAME) + evaluation_score_worker.SetInputSignalMetadata(apm_input_metadata) + evaluation_score_worker.SetReferenceSignalFilepath( + reference_input_filepath) + evaluation_score_worker.SetTestedSignalFilepath( + apm_output_filepath) + + evaluation_score_worker.Run(output_path) + scores[evaluation_score_worker.NAME] = evaluation_score_worker.score + + return scores diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py new file mode 100644 index 0000000000..852e9e8468 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/exceptions.py @@ -0,0 +1,46 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Exception classes. +""" + + +class FileNotFoundError(Exception): + """File not found exception. + """ + pass + + +class SignalProcessingException(Exception): + """Signal processing exception. + """ + pass + + +class InputMixerException(Exception): + """Input mixer exception. + """ + pass + + +class InputSignalCreatorException(Exception): + """Input signal creator exception. + """ + pass + + +class EvaluationScoreException(Exception): + """Evaluation score exception. + """ + pass + + +class InitializationException(Exception): + """Initialization exception. + """ + pass diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py new file mode 100644 index 0000000000..4c50cea21f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py @@ -0,0 +1,402 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import functools +import hashlib +import logging +import os +import re +import sys + +try: + import csscompressor +except ImportError: + logging.critical('Cannot import the third-party Python package csscompressor') + sys.exit(1) + +try: + import jsmin +except ImportError: + logging.critical('Cannot import the third-party Python package jsmin') + sys.exit(1) + + +class HtmlExport(object): + """HTML exporter class for APM quality scores.""" + + _NEW_LINE = '\n' + + # CSS and JS file paths. + _PATH = os.path.dirname(os.path.realpath(__file__)) + _CSS_FILEPATH = os.path.join(_PATH, 'results.css') + _CSS_MINIFIED = True + _JS_FILEPATH = os.path.join(_PATH, 'results.js') + _JS_MINIFIED = True + + def __init__(self, output_filepath): + self._scores_data_frame = None + self._output_filepath = output_filepath + + def Export(self, scores_data_frame): + """Exports scores into an HTML file. + + Args: + scores_data_frame: DataFrame instance. + """ + self._scores_data_frame = scores_data_frame + html = ['<html>', + self._BuildHeader(), + ('<script type="text/javascript">' + '(function () {' + 'window.addEventListener(\'load\', function () {' + 'var inspector = new AudioInspector();' + '});' + '})();' + '</script>'), + '<body>', + self._BuildBody(), + '</body>', + '</html>'] + self._Save(self._output_filepath, self._NEW_LINE.join(html)) + + def _BuildHeader(self): + """Builds the <head> section of the HTML file. + + The header contains the page title and either embedded or linked CSS and JS + files. + + Returns: + A string with <head>...</head> HTML. + """ + html = ['<head>', '<title>Results</title>'] + + # Add Material Design hosted libs. + html.append('<link rel="stylesheet" href="http://fonts.googleapis.com/' + 'css?family=Roboto:300,400,500,700" type="text/css">') + html.append('<link rel="stylesheet" href="https://fonts.googleapis.com/' + 'icon?family=Material+Icons">') + html.append('<link rel="stylesheet" href="https://code.getmdl.io/1.3.0/' + 'material.indigo-pink.min.css">') + html.append('<script defer src="https://code.getmdl.io/1.3.0/' + 'material.min.js"></script>') + + # Embed custom JavaScript and CSS files. + html.append('<script>') + with open(self._JS_FILEPATH) as f: + html.append(jsmin.jsmin(f.read()) if self._JS_MINIFIED else ( + f.read().rstrip())) + html.append('</script>') + html.append('<style>') + with open(self._CSS_FILEPATH) as f: + html.append(csscompressor.compress(f.read()) if self._CSS_MINIFIED else ( + f.read().rstrip())) + html.append('</style>') + + html.append('</head>') + + return self._NEW_LINE.join(html) + + def _BuildBody(self): + """Builds the content of the <body> section.""" + score_names = self._scores_data_frame['eval_score_name'].drop_duplicates( + ).values.tolist() + + html = [ + ('<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header ' + 'mdl-layout--fixed-tabs">'), + '<header class="mdl-layout__header">', + '<div class="mdl-layout__header-row">', + '<span class="mdl-layout-title">APM QA results ({})</span>'.format( + self._output_filepath), + '</div>', + ] + + # Tab selectors. + html.append('<div class="mdl-layout__tab-bar mdl-js-ripple-effect">') + for tab_index, score_name in enumerate(score_names): + is_active = tab_index == 0 + html.append('<a href="#score-tab-{}" class="mdl-layout__tab{}">' + '{}</a>'.format(tab_index, + ' is-active' if is_active else '', + self._FormatName(score_name))) + html.append('</div>') + + html.append('</header>') + html.append('<main class="mdl-layout__content" style="overflow-x: auto;">') + + # Tabs content. + for tab_index, score_name in enumerate(score_names): + html.append('<section class="mdl-layout__tab-panel{}" ' + 'id="score-tab-{}">'.format( + ' is-active' if is_active else '', tab_index)) + html.append('<div class="page-content">') + html.append(self._BuildScoreTab(score_name, ('s{}'.format(tab_index),))) + html.append('</div>') + html.append('</section>') + + html.append('</main>') + html.append('</div>') + + # Add snackbar for notifications. + html.append( + '<div id="snackbar" aria-live="assertive" aria-atomic="true"' + ' aria-relevant="text" class="mdl-snackbar mdl-js-snackbar">' + '<div class="mdl-snackbar__text"></div>' + '<button type="button" class="mdl-snackbar__action"></button>' + '</div>') + + return self._NEW_LINE.join(html) + + def _BuildScoreTab(self, score_name, anchor_data): + """Builds the content of a tab.""" + # Find unique values. + scores = self._scores_data_frame[ + self._scores_data_frame.eval_score_name == score_name] + apm_configs = sorted(self._FindUniqueTuples(scores, ['apm_config'])) + test_data_gen_configs = sorted(self._FindUniqueTuples( + scores, ['test_data_gen', 'test_data_gen_params'])) + + html = [ + '<div class="mdl-grid">', + '<div class="mdl-layout-spacer"></div>', + '<div class="mdl-cell mdl-cell--10-col">', + ('<table class="mdl-data-table mdl-js-data-table mdl-shadow--2dp" ' + 'style="width: 100%;">'), + ] + + # Header. + html.append('<thead><tr><th>APM config / Test data generator</th>') + for test_data_gen_info in test_data_gen_configs: + html.append('<th>{} {}</th>'.format( + self._FormatName(test_data_gen_info[0]), test_data_gen_info[1])) + html.append('</tr></thead>') + + # Body. + html.append('<tbody>') + for apm_config in apm_configs: + html.append('<tr><td>' + self._FormatName(apm_config[0]) + '</td>') + for test_data_gen_info in test_data_gen_configs: + dialog_id = self._ScoreStatsInspectorDialogId( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1]) + html.append( + '<td onclick="openScoreStatsInspector(\'{}\')">{}</td>'.format( + dialog_id, self._BuildScoreTableCell( + score_name, test_data_gen_info[0], test_data_gen_info[1], + apm_config[0]))) + html.append('</tr>') + html.append('</tbody>') + + html.append('</table></div><div class="mdl-layout-spacer"></div></div>') + + html.append(self._BuildScoreStatsInspectorDialogs( + score_name, apm_configs, test_data_gen_configs, + anchor_data)) + + return self._NEW_LINE.join(html) + + def _BuildScoreTableCell(self, score_name, test_data_gen, + test_data_gen_params, apm_config): + """Builds the content of a table cell for a score table.""" + scores = self._SliceDataForScoreTableCell( + score_name, apm_config, test_data_gen, test_data_gen_params) + stats = self._ComputeScoreStats(scores) + + html = [] + items_id_prefix = ( + score_name + test_data_gen + test_data_gen_params + apm_config) + if stats['count'] == 1: + # Show the only available score. + item_id = hashlib.md5(items_id_prefix.encode('utf-8')).hexdigest() + html.append('<div id="single-value-{0}">{1:f}</div>'.format( + item_id, scores['score'].mean())) + html.append('<div class="mdl-tooltip" data-mdl-for="single-value-{}">{}' + '</div>'.format(item_id, 'single value')) + else: + # Show stats. + for stat_name in ['min', 'max', 'mean', 'std dev']: + item_id = hashlib.md5( + (items_id_prefix + stat_name).encode('utf-8')).hexdigest() + html.append('<div id="stats-{0}">{1:f}</div>'.format( + item_id, stats[stat_name])) + html.append('<div class="mdl-tooltip" data-mdl-for="stats-{}">{}' + '</div>'.format(item_id, stat_name)) + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorDialogs( + self, score_name, apm_configs, test_data_gen_configs, anchor_data): + """Builds a set of score stats inspector dialogs.""" + html = [] + for apm_config in apm_configs: + for test_data_gen_info in test_data_gen_configs: + dialog_id = self._ScoreStatsInspectorDialogId( + score_name, apm_config[0], + test_data_gen_info[0], test_data_gen_info[1]) + + html.append('<dialog class="mdl-dialog" id="{}" ' + 'style="width: 40%;">'.format(dialog_id)) + + # Content. + html.append('<div class="mdl-dialog__content">') + html.append('<h6><strong>APM config preset</strong>: {}<br/>' + '<strong>Test data generator</strong>: {} ({})</h6>'.format( + self._FormatName(apm_config[0]), + self._FormatName(test_data_gen_info[0]), + test_data_gen_info[1])) + html.append(self._BuildScoreStatsInspectorDialog( + score_name, apm_config[0], test_data_gen_info[0], + test_data_gen_info[1], anchor_data + (dialog_id,))) + html.append('</div>') + + # Actions. + html.append('<div class="mdl-dialog__actions">') + html.append('<button type="button" class="mdl-button" ' + 'onclick="closeScoreStatsInspector()">' + 'Close</button>') + html.append('</div>') + + html.append('</dialog>') + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorDialog( + self, score_name, apm_config, test_data_gen, test_data_gen_params, + anchor_data): + """Builds one score stats inspector dialog.""" + scores = self._SliceDataForScoreTableCell( + score_name, apm_config, test_data_gen, test_data_gen_params) + + capture_render_pairs = sorted(self._FindUniqueTuples( + scores, ['capture', 'render'])) + echo_simulators = sorted(self._FindUniqueTuples(scores, ['echo_simulator'])) + + html = ['<table class="mdl-data-table mdl-js-data-table mdl-shadow--2dp">'] + + # Header. + html.append('<thead><tr><th>Capture-Render / Echo simulator</th>') + for echo_simulator in echo_simulators: + html.append('<th>' + self._FormatName(echo_simulator[0]) +'</th>') + html.append('</tr></thead>') + + # Body. + html.append('<tbody>') + for row, (capture, render) in enumerate(capture_render_pairs): + html.append('<tr><td><div>{}</div><div>{}</div></td>'.format( + capture, render)) + for col, echo_simulator in enumerate(echo_simulators): + score_tuple = self._SliceDataForScoreStatsTableCell( + scores, capture, render, echo_simulator[0]) + cell_class = 'r{}c{}'.format(row, col) + html.append('<td class="single-score-cell {}">{}</td>'.format( + cell_class, self._BuildScoreStatsInspectorTableCell( + score_tuple, anchor_data + (cell_class,)))) + html.append('</tr>') + html.append('</tbody>') + + html.append('</table>') + + # Placeholder for the audio inspector. + html.append('<div class="audio-inspector-placeholder"></div>') + + return self._NEW_LINE.join(html) + + def _BuildScoreStatsInspectorTableCell(self, score_tuple, anchor_data): + """Builds the content of a cell of a score stats inspector.""" + anchor = '&'.join(anchor_data) + html = [('<div class="v">{}</div>' + '<button class="mdl-button mdl-js-button mdl-button--icon"' + ' data-anchor="{}">' + '<i class="material-icons mdl-color-text--blue-grey">link</i>' + '</button>').format(score_tuple.score, anchor)] + + # Add all the available file paths as hidden data. + for field_name in score_tuple.keys(): + if field_name.endswith('_filepath'): + html.append('<input type="hidden" name="{}" value="{}">'.format( + field_name, score_tuple[field_name])) + + return self._NEW_LINE.join(html) + + def _SliceDataForScoreTableCell( + self, score_name, apm_config, test_data_gen, test_data_gen_params): + """Slices |self._scores_data_frame| to extract the data for a tab.""" + masks = [] + masks.append(self._scores_data_frame.eval_score_name == score_name) + masks.append(self._scores_data_frame.apm_config == apm_config) + masks.append(self._scores_data_frame.test_data_gen == test_data_gen) + masks.append( + self._scores_data_frame.test_data_gen_params == test_data_gen_params) + mask = functools.reduce((lambda i1, i2: i1 & i2), masks) + del masks + return self._scores_data_frame[mask] + + @classmethod + def _SliceDataForScoreStatsTableCell( + cls, scores, capture, render, echo_simulator): + """Slices |scores| to extract the data for a tab.""" + masks = [] + + masks.append(scores.capture == capture) + masks.append(scores.render == render) + masks.append(scores.echo_simulator == echo_simulator) + mask = functools.reduce((lambda i1, i2: i1 & i2), masks) + del masks + + sliced_data = scores[mask] + assert len(sliced_data) == 1, 'single score is expected' + return sliced_data.iloc[0] + + @classmethod + def _FindUniqueTuples(cls, data_frame, fields): + """Slices |data_frame| to a list of fields and finds unique tuples.""" + return data_frame[fields].drop_duplicates().values.tolist() + + @classmethod + def _ComputeScoreStats(cls, data_frame): + """Computes score stats.""" + scores = data_frame['score'] + return { + 'count': scores.count(), + 'min': scores.min(), + 'max': scores.max(), + 'mean': scores.mean(), + 'std dev': scores.std(), + } + + @classmethod + def _ScoreStatsInspectorDialogId(cls, score_name, apm_config, test_data_gen, + test_data_gen_params): + """Assigns a unique name to a dialog.""" + return 'score-stats-dialog-' + hashlib.md5( + 'score-stats-inspector-{}-{}-{}-{}'.format( + score_name, apm_config, test_data_gen, + test_data_gen_params).encode('utf-8')).hexdigest() + + @classmethod + def _Save(cls, output_filepath, html): + """Writes the HTML file. + + Args: + output_filepath: output file path. + html: string with the HTML content. + """ + with open(output_filepath, 'w') as f: + f.write(html) + + @classmethod + def _FormatName(cls, name): + """Formats a name. + + Args: + name: a string. + + Returns: + A copy of name in which underscores and dashes are replaced with a space. + """ + return re.sub(r'[_\-]', ' ', name) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py new file mode 100644 index 0000000000..8b4e20699c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export_unittest.py @@ -0,0 +1,84 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the export module. +""" + +import logging +import os +import shutil +import tempfile +import unittest + +import pyquery as pq + +from . import audioproc_wrapper +from . import collect_data +from . import eval_scores_factory +from . import evaluation +from . import export +from . import simulation +from . import test_data_generation_factory + + +class TestExport(unittest.TestCase): + """Unit tests for the export module. + """ + + _CLEAN_TMP_OUTPUT = True + + def setUp(self): + """Creates temporary data to export.""" + self._tmp_path = tempfile.mkdtemp() + + # Run a fake experiment to produce data to export. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'fake_polqa'))), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper.DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + simulator.Run( + config_filepaths=['apm_configs/default.json'], + capture_input_filepaths=[ + os.path.join(self._tmp_path, 'pure_tone-440_1000.wav'), + os.path.join(self._tmp_path, 'pure_tone-880_1000.wav'), + ], + test_data_generator_names=['identity', 'white_noise'], + eval_score_names=['audio_level_peak', 'audio_level_mean'], + output_dir=self._tmp_path) + + # Export results. + p = collect_data.InstanceArgumentsParser() + args = p.parse_args(['--output_dir', self._tmp_path]) + src_path = collect_data.ConstructSrcPath(args) + self._data_to_export = collect_data.FindScores(src_path, args) + + def tearDown(self): + """Recursively deletes temporary folders.""" + if self._CLEAN_TMP_OUTPUT: + shutil.rmtree(self._tmp_path) + else: + logging.warning(self.id() + ' did not clean the temporary path ' + ( + self._tmp_path)) + + def testCreateHtmlReport(self): + fn_out = os.path.join(self._tmp_path, 'results.html') + exporter = export.HtmlExport(fn_out) + exporter.Export(self._data_to_export) + + document = pq.PyQuery(filename=fn_out) + self.assertIsInstance(document, pq.PyQuery) + # TODO(alessiob): Use PyQuery API to check the HTML file. diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py new file mode 100644 index 0000000000..01418d84fe --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/external_vad.py @@ -0,0 +1,77 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +from __future__ import division + +import logging +import os +import subprocess +import shutil +import sys +import tempfile + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +from . import signal_processing + +class ExternalVad(object): + + def __init__(self, path_to_binary, name): + """Args: + path_to_binary: path to binary that accepts '-i <wav>', '-o + <float probabilities>'. There must be one float value per + 10ms audio + name: a name to identify the external VAD. Used for saving + the output as extvad_output-<name>. + """ + self._path_to_binary = path_to_binary + self.name = name + assert os.path.exists(self._path_to_binary), ( + self._path_to_binary) + self._vad_output = None + + def Run(self, wav_file_path): + _signal = signal_processing.SignalProcessingUtils.LoadWav(wav_file_path) + if _signal.channels != 1: + raise NotImplementedError('Multiple-channel' + ' annotations not implemented') + if _signal.frame_rate != 48000: + raise NotImplementedError('Frame rates ' + 'other than 48000 not implemented') + + tmp_path = tempfile.mkdtemp() + try: + output_file_path = os.path.join( + tmp_path, self.name + '_vad.tmp') + subprocess.call([ + self._path_to_binary, + '-i', wav_file_path, + '-o', output_file_path + ]) + self._vad_output = np.fromfile(output_file_path, np.float32) + except Exception as e: + logging.error('Error while running the ' + self.name + + ' VAD (' + e.message + ')') + finally: + if os.path.exists(tmp_path): + shutil.rmtree(tmp_path) + + def GetVadOutput(self): + assert self._vad_output is not None + return self._vad_output + + @classmethod + def ConstructVadDict(cls, vad_paths, vad_names): + external_vads = {} + for path, name in zip(vad_paths, vad_names): + external_vads[name] = ExternalVad(path, name) + return external_vads diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py new file mode 100755 index 0000000000..7c75e8f5c3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_external_vad.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +import argparse +import numpy as np + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-i', required=True) + parser.add_argument('-o', required=True) + + args = parser.parse_args() + + array = np.arange(100, dtype=np.float32) + array.tofile(open(args.o, 'w')) + + +if __name__ == '__main__': + main() diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc new file mode 100644 index 0000000000..62d8ebb84d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/fake_polqa.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <fstream> +#include <iostream> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { +namespace { + +const char* const kErrorMessage = "-Out /path/to/output/file is mandatory"; + +// Writes fake output intended to be parsed by +// quality_assessment.eval_scores.PolqaScore. +void WriteOutputFile(const std::string& output_file_path) { + RTC_CHECK_NE(output_file_path, ""); + std::ofstream out(output_file_path); + RTC_CHECK(!out.bad()); + out << "* Fake Polqa output" << std::endl; + out << "FakeField1\tPolqaScore\tFakeField2" << std::endl; + out << "FakeValue1\t3.25\tFakeValue2" << std::endl; + out.close(); +} + +} // namespace + +int main(int argc, char* argv[]) { + // Find "-Out" and use its next argument as output file path. + RTC_CHECK_GE(argc, 3) << kErrorMessage; + const std::string kSoughtFlagName = "-Out"; + for (int i = 1; i < argc - 1; ++i) { + if (kSoughtFlagName.compare(argv[i]) == 0) { + WriteOutputFile(argv[i + 1]); + return 0; + } + } + FATAL() << kErrorMessage; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py new file mode 100644 index 0000000000..b1afe14454 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer.py @@ -0,0 +1,95 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Input mixer module. +""" + +import logging +import os + +from . import exceptions +from . import signal_processing + + +class ApmInputMixer(object): + """Class to mix a set of audio segments down to the APM input.""" + + _HARD_CLIPPING_LOG_MSG = 'hard clipping detected in the mixed signal' + + def __init__(self): + pass + + @classmethod + def HardClippingLogMessage(cls): + """Returns the log message used when hard clipping is detected in the mix. + + This method is mainly intended to be used by the unit tests. + """ + return cls._HARD_CLIPPING_LOG_MSG + + @classmethod + def Mix(cls, output_path, capture_input_filepath, echo_filepath): + """Mixes capture and echo. + + Creates the overall capture input for APM by mixing the "echo-free" capture + signal with the echo signal (e.g., echo simulated via the + echo_path_simulation module). + + The echo signal cannot be shorter than the capture signal and the generated + mix will have the same duration of the capture signal. The latter property + is enforced in order to let the input of APM and the reference signal + created by TestDataGenerator have the same length (required for the + evaluation step). + + Hard-clipping may occur in the mix; a warning is raised when this happens. + + If |echo_filepath| is None, nothing is done and |capture_input_filepath| is + returned. + + Args: + speech: AudioSegment instance. + echo_path: AudioSegment instance or None. + + Returns: + Path to the mix audio track file. + """ + if echo_filepath is None: + return capture_input_filepath + + # Build the mix output file name as a function of the echo file name. + # This ensures that if the internal parameters of the echo path simulator + # change, no erroneous cache hit occurs. + echo_file_name, _ = os.path.splitext(os.path.split(echo_filepath)[1]) + capture_input_file_name, _ = os.path.splitext( + os.path.split(capture_input_filepath)[1]) + mix_filepath = os.path.join(output_path, 'mix_capture_{}_{}.wav'.format( + capture_input_file_name, echo_file_name)) + + # Create the mix if not done yet. + mix = None + if not os.path.exists(mix_filepath): + echo_free_capture = signal_processing.SignalProcessingUtils.LoadWav( + capture_input_filepath) + echo = signal_processing.SignalProcessingUtils.LoadWav(echo_filepath) + + if signal_processing.SignalProcessingUtils.CountSamples(echo) < ( + signal_processing.SignalProcessingUtils.CountSamples( + echo_free_capture)): + raise exceptions.InputMixerException( + 'echo cannot be shorter than capture') + + mix = echo_free_capture.overlay(echo) + signal_processing.SignalProcessingUtils.SaveWav(mix_filepath, mix) + + # Check if hard clipping occurs. + if mix is None: + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + if signal_processing.SignalProcessingUtils.DetectHardClipping(mix): + logging.warning(cls._HARD_CLIPPING_LOG_MSG) + + return mix_filepath diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py new file mode 100644 index 0000000000..b212614199 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_mixer_unittest.py @@ -0,0 +1,149 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the input mixer module. +""" + +import logging +import os +import shutil +import sys +import tempfile +import unittest + +SRC = os.path.abspath(os.path.join( + os.path.dirname((__file__)), os.pardir, os.pardir, os.pardir, os.pardir)) +sys.path.append(os.path.join(SRC, 'third_party', 'pymock')) + +import mock + +from . import exceptions +from . import input_mixer +from . import signal_processing + + +class TestApmInputMixer(unittest.TestCase): + """Unit tests for the ApmInputMixer class. + """ + + # Audio track file names created in setUp(). + _FILENAMES = ['capture', 'echo_1', 'echo_2', 'shorter', 'longer'] + + # Target peak power level (dBFS) of each audio track file created in setUp(). + # These values are hand-crafted in order to make saturation happen when + # capture and echo_2 are mixed and the contrary for capture and echo_1. + # None means that the power is not changed. + _MAX_PEAK_POWER_LEVELS = [-10.0, -5.0, 0.0, None, None] + + # Audio track file durations in milliseconds. + _DURATIONS = [1000, 1000, 1000, 800, 1200] + + _SAMPLE_RATE = 48000 + + def setUp(self): + """Creates temporary data.""" + self._tmp_path = tempfile.mkdtemp() + + # Create audio track files. + self._audio_tracks = {} + for filename, peak_power, duration in zip( + self._FILENAMES, self._MAX_PEAK_POWER_LEVELS, self._DURATIONS): + audio_track_filepath = os.path.join(self._tmp_path, '{}.wav'.format( + filename)) + + # Create a pure tone with the target peak power level. + template = signal_processing.SignalProcessingUtils.GenerateSilence( + duration=duration, sample_rate=self._SAMPLE_RATE) + signal = signal_processing.SignalProcessingUtils.GeneratePureTone( + template) + if peak_power is not None: + signal = signal.apply_gain(-signal.max_dBFS + peak_power) + + signal_processing.SignalProcessingUtils.SaveWav( + audio_track_filepath, signal) + self._audio_tracks[filename] = { + 'filepath': audio_track_filepath, + 'num_samples': signal_processing.SignalProcessingUtils.CountSamples( + signal) + } + + def tearDown(self): + """Recursively deletes temporary folders.""" + shutil.rmtree(self._tmp_path) + + def testCheckMixSameDuration(self): + """Checks the duration when mixing capture and echo with same duration.""" + mix_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, + self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertTrue(os.path.exists(mix_filepath)) + + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + self.assertEqual(self._audio_tracks['capture']['num_samples'], + signal_processing.SignalProcessingUtils.CountSamples(mix)) + + def testRejectShorterEcho(self): + """Rejects echo signals that are shorter than the capture signal.""" + try: + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, + self._audio_tracks['capture']['filepath'], + self._audio_tracks['shorter']['filepath']) + self.fail('no exception raised') + except exceptions.InputMixerException: + pass + + def testCheckMixDurationWithLongerEcho(self): + """Checks the duration when mixing an echo longer than the capture.""" + mix_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, + self._audio_tracks['capture']['filepath'], + self._audio_tracks['longer']['filepath']) + self.assertTrue(os.path.exists(mix_filepath)) + + mix = signal_processing.SignalProcessingUtils.LoadWav(mix_filepath) + self.assertEqual(self._audio_tracks['capture']['num_samples'], + signal_processing.SignalProcessingUtils.CountSamples(mix)) + + def testCheckOutputFileNamesConflict(self): + """Checks that different echo files lead to different output file names.""" + mix1_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, + self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertTrue(os.path.exists(mix1_filepath)) + + mix2_filepath = input_mixer.ApmInputMixer.Mix( + self._tmp_path, + self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_2']['filepath']) + self.assertTrue(os.path.exists(mix2_filepath)) + + self.assertNotEqual(mix1_filepath, mix2_filepath) + + def testHardClippingLogExpected(self): + """Checks that hard clipping warning is raised when occurring.""" + logging.warning = mock.MagicMock(name='warning') + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, + self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_2']['filepath']) + logging.warning.assert_called_once_with( + input_mixer.ApmInputMixer.HardClippingLogMessage()) + + def testHardClippingLogNotExpected(self): + """Checks that hard clipping warning is not raised when not occurring.""" + logging.warning = mock.MagicMock(name='warning') + _ = input_mixer.ApmInputMixer.Mix( + self._tmp_path, + self._audio_tracks['capture']['filepath'], + self._audio_tracks['echo_1']['filepath']) + self.assertNotIn( + mock.call(input_mixer.ApmInputMixer.HardClippingLogMessage()), + logging.warning.call_args_list) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py new file mode 100644 index 0000000000..5d97c3b2fc --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/input_signal_creator.py @@ -0,0 +1,67 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Input signal creator module. +""" + +from . import exceptions +from . import signal_processing + + +class InputSignalCreator(object): + """Input signal creator class. + """ + + @classmethod + def Create(cls, name, raw_params): + """Creates a input signal and its metadata. + + Args: + name: Input signal creator name. + raw_params: Tuple of parameters to pass to the specific signal creator. + + Returns: + (AudioSegment, dict) tuple. + """ + try: + signal = {} + params = {} + + if name == 'pure_tone': + params['frequency'] = float(raw_params[0]) + params['duration'] = int(raw_params[1]) + signal = cls._CreatePureTone(params['frequency'], params['duration']) + else: + raise exceptions.InputSignalCreatorException( + 'Invalid input signal creator name') + + # Complete metadata. + params['signal'] = name + + return signal, params + except (TypeError, AssertionError) as e: + raise exceptions.InputSignalCreatorException( + 'Invalid signal creator parameters: {}'.format(e)) + + @classmethod + def _CreatePureTone(cls, frequency, duration): + """ + Generates a pure tone at 48000 Hz. + + Args: + frequency: Float in (0-24000] (Hz). + duration: Integer (milliseconds). + + Returns: + AudioSegment instance. + """ + assert 0 < frequency <= 24000 + assert 0 < duration + template = signal_processing.SignalProcessingUtils.GenerateSilence(duration) + return signal_processing.SignalProcessingUtils.GeneratePureTone( + template, frequency) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css new file mode 100644 index 0000000000..2f406bb002 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.css @@ -0,0 +1,32 @@ +/* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +td.selected-score { + background-color: #DDD; +} + +td.single-score-cell{ + text-align: center; +} + +.audio-inspector { + text-align: center; +} + +.audio-inspector div{ + margin-bottom: 0; + padding-bottom: 0; + padding-top: 0; +} + +.audio-inspector div div{ + margin-bottom: 0; + padding-bottom: 0; + padding-top: 0; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js new file mode 100644 index 0000000000..8e47411058 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/results.js @@ -0,0 +1,376 @@ +// Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +/** + * Opens the score stats inspector dialog. + * @param {String} dialogId: identifier of the dialog to show. + * @return {DOMElement} The dialog element that has been opened. + */ +function openScoreStatsInspector(dialogId) { + var dialog = document.getElementById(dialogId); + dialog.showModal(); + return dialog; +} + +/** + * Closes the score stats inspector dialog. + */ +function closeScoreStatsInspector() { + var dialog = document.querySelector('dialog[open]'); + if (dialog == null) + return; + dialog.close(); +} + +/** + * Audio inspector class. + * @constructor + */ +function AudioInspector() { + console.debug('Creating an AudioInspector instance.'); + this.audioPlayer_ = new Audio(); + this.metadata_ = {}; + this.currentScore_ = null; + this.audioInspector_ = null; + this.snackbarContainer_ = document.querySelector('#snackbar'); + + // Get base URL without anchors. + this.baseUrl_ = window.location.href; + var index = this.baseUrl_.indexOf('#'); + if (index > 0) + this.baseUrl_ = this.baseUrl_.substr(0, index) + console.info('Base URL set to "' + window.location.href + '".'); + + window.event.stopPropagation(); + this.createTextAreasForCopy_(); + this.createAudioInspector_(); + this.initializeEventHandlers_(); + + // When MDL is ready, parse the anchor (if any) to show the requested + // experiment. + var self = this; + document.querySelectorAll('header a')[0].addEventListener( + 'mdl-componentupgraded', function() { + if (!self.parseWindowAnchor()) { + // If not experiment is requested, open the first section. + console.info('No anchor parsing, opening the first section.'); + document.querySelectorAll('header a > span')[0].click(); + } + }); +} + +/** + * Parse the anchor in the window URL. + * @return {bool} True if the parsing succeeded. + */ +AudioInspector.prototype.parseWindowAnchor = function() { + var index = location.href.indexOf('#'); + if (index == -1) { + console.debug('No # found in the URL.'); + return false; + } + + var anchor = location.href.substr(index - location.href.length + 1); + console.info('Anchor changed: "' + anchor + '".'); + + var parts = anchor.split('&'); + if (parts.length != 3) { + console.info('Ignoring anchor with invalid number of fields.'); + return false; + } + + var openDialog = document.querySelector('dialog[open]'); + try { + // Open the requested dialog if not already open. + if (!openDialog || openDialog.id != parts[1]) { + !openDialog || openDialog.close(); + document.querySelectorAll('header a > span')[ + parseInt(parts[0].substr(1))].click(); + openDialog = openScoreStatsInspector(parts[1]); + } + + // Trigger click on cell. + var cell = openDialog.querySelector('td.' + parts[2]); + cell.focus(); + cell.click(); + + this.showNotification_('Experiment selected.'); + return true; + } catch (e) { + this.showNotification_('Cannot select experiment :('); + console.error('Exception caught while selecting experiment: "' + e + '".'); + } + + return false; +} + +/** + * Set up the inspector for a new score. + * @param {DOMElement} element: Element linked to the selected score. + */ +AudioInspector.prototype.selectedScoreChange = function(element) { + if (this.currentScore_ == element) { return; } + if (this.currentScore_ != null) { + this.currentScore_.classList.remove('selected-score'); + } + this.currentScore_ = element; + this.currentScore_.classList.add('selected-score'); + this.stopAudio(); + + // Read metadata. + var matches = element.querySelectorAll('input[type=hidden]'); + this.metadata_ = {}; + for (var index = 0; index < matches.length; ++index) { + this.metadata_[matches[index].name] = matches[index].value; + } + + // Show the audio inspector interface. + var container = element.parentNode.parentNode.parentNode.parentNode; + var audioInspectorPlaceholder = container.querySelector( + '.audio-inspector-placeholder'); + this.moveInspector_(audioInspectorPlaceholder); +}; + +/** + * Stop playing audio. + */ +AudioInspector.prototype.stopAudio = function() { + console.info('Pausing audio play out.'); + this.audioPlayer_.pause(); +}; + +/** + * Show a text message using the snackbar. + */ +AudioInspector.prototype.showNotification_ = function(text) { + try { + this.snackbarContainer_.MaterialSnackbar.showSnackbar({ + message: text, timeout: 2000}); + } catch (e) { + // Fallback to an alert. + alert(text); + console.warn('Cannot use snackbar: "' + e + '"'); + } +} + +/** + * Move the audio inspector DOM node into the given parent. + * @param {DOMElement} newParentNode: New parent for the inspector. + */ +AudioInspector.prototype.moveInspector_ = function(newParentNode) { + newParentNode.appendChild(this.audioInspector_); +}; + +/** + * Play audio file from url. + * @param {string} metadataFieldName: Metadata field name. + */ +AudioInspector.prototype.playAudio = function(metadataFieldName) { + if (this.metadata_[metadataFieldName] == undefined) { return; } + if (this.metadata_[metadataFieldName] == 'None') { + alert('The selected stream was not used during the experiment.'); + return; + } + this.stopAudio(); + this.audioPlayer_.src = this.metadata_[metadataFieldName]; + console.debug('Audio source URL: "' + this.audioPlayer_.src + '"'); + this.audioPlayer_.play(); + console.info('Playing out audio.'); +}; + +/** + * Create hidden text areas to copy URLs. + * + * For each dialog, one text area is created since it is not possible to select + * text on a text area outside of the active dialog. + */ +AudioInspector.prototype.createTextAreasForCopy_ = function() { + var self = this; + document.querySelectorAll('dialog.mdl-dialog').forEach(function(element) { + var textArea = document.createElement("textarea"); + textArea.classList.add('url-copy'); + textArea.style.position = 'fixed'; + textArea.style.bottom = 0; + textArea.style.left = 0; + textArea.style.width = '2em'; + textArea.style.height = '2em'; + textArea.style.border = 'none'; + textArea.style.outline = 'none'; + textArea.style.boxShadow = 'none'; + textArea.style.background = 'transparent'; + textArea.style.fontSize = '6px'; + element.appendChild(textArea); + }); +} + +/** + * Create audio inspector. + */ +AudioInspector.prototype.createAudioInspector_ = function() { + var buttonIndex = 0; + function getButtonHtml(icon, toolTipText, caption, metadataFieldName) { + var buttonId = 'audioInspectorButton' + buttonIndex++; + html = caption == null ? '' : caption; + html += '<button class="mdl-button mdl-js-button mdl-button--icon ' + + 'mdl-js-ripple-effect" id="' + buttonId + '">' + + '<i class="material-icons">' + icon + '</i>' + + '<div class="mdl-tooltip" data-mdl-for="' + buttonId + '">' + + toolTipText + + '</div>'; + if (metadataFieldName != null) { + html += '<input type="hidden" value="' + metadataFieldName + '">' + } + html += '</button>' + + return html; + } + + // TODO(alessiob): Add timeline and highlight current track by changing icon + // color. + + this.audioInspector_ = document.createElement('div'); + this.audioInspector_.classList.add('audio-inspector'); + this.audioInspector_.innerHTML = + '<div class="mdl-grid">' + + '<div class="mdl-layout-spacer"></div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('play_arrow', 'Simulated echo', 'E<sub>in</sub>', + 'echo_filepath') + + '</div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('stop', 'Stop playing [S]', null, '__stop__') + + '</div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('play_arrow', 'Render stream', 'R<sub>in</sub>', + 'render_filepath') + + '</div>' + + '<div class="mdl-layout-spacer"></div>' + + '</div>' + + '<div class="mdl-grid">' + + '<div class="mdl-layout-spacer"></div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('play_arrow', 'Capture stream (APM input) [1]', + 'Y\'<sub>in</sub>', 'capture_filepath') + + '</div>' + + '<div class="mdl-cell mdl-cell--2-col"><strong>APM</strong></div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('play_arrow', 'APM output [2]', 'Y<sub>out</sub>', + 'apm_output_filepath') + + '</div>' + + '<div class="mdl-layout-spacer"></div>' + + '</div>' + + '<div class="mdl-grid">' + + '<div class="mdl-layout-spacer"></div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('play_arrow', 'Echo-free capture stream', + 'Y<sub>in</sub>', 'echo_free_capture_filepath') + + '</div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('play_arrow', 'Clean capture stream', + 'Y<sub>clean</sub>', 'clean_capture_input_filepath') + + '</div>' + + '<div class="mdl-cell mdl-cell--2-col">' + + getButtonHtml('play_arrow', 'APM reference [3]', 'Y<sub>ref</sub>', + 'apm_reference_filepath') + + '</div>' + + '<div class="mdl-layout-spacer"></div>' + + '</div>'; + + // Add an invisible node as initial container for the audio inspector. + var parent = document.createElement('div'); + parent.style.display = 'none'; + this.moveInspector_(parent); + document.body.appendChild(parent); +}; + +/** + * Initialize event handlers. + */ +AudioInspector.prototype.initializeEventHandlers_ = function() { + var self = this; + + // Score cells. + document.querySelectorAll('td.single-score-cell').forEach(function(element) { + element.onclick = function() { + self.selectedScoreChange(this); + } + }); + + // Copy anchor URLs icons. + if (document.queryCommandSupported('copy')) { + document.querySelectorAll('td.single-score-cell button').forEach( + function(element) { + element.onclick = function() { + // Find the text area in the dialog. + var textArea = element.closest('dialog').querySelector( + 'textarea.url-copy'); + + // Copy. + textArea.value = self.baseUrl_ + '#' + element.getAttribute( + 'data-anchor'); + textArea.select(); + try { + if (!document.execCommand('copy')) + throw 'Copy returned false'; + self.showNotification_('Experiment URL copied.'); + } catch (e) { + self.showNotification_('Cannot copy experiment URL :('); + console.error(e); + } + } + }); + } else { + self.showNotification_( + 'The copy command is disabled. URL copy is not enabled.'); + } + + // Audio inspector buttons. + this.audioInspector_.querySelectorAll('button').forEach(function(element) { + var target = element.querySelector('input[type=hidden]'); + if (target == null) { return; } + element.onclick = function() { + if (target.value == '__stop__') { + self.stopAudio(); + } else { + self.playAudio(target.value); + } + }; + }); + + // Dialog close handlers. + var dialogs = document.querySelectorAll('dialog').forEach(function(element) { + element.onclose = function() { + self.stopAudio(); + } + }); + + // Keyboard shortcuts. + window.onkeyup = function(e) { + var key = e.keyCode ? e.keyCode : e.which; + switch (key) { + case 49: // 1. + self.playAudio('capture_filepath'); + break; + case 50: // 2. + self.playAudio('apm_output_filepath'); + break; + case 51: // 3. + self.playAudio('apm_reference_filepath'); + break; + case 83: // S. + case 115: // s. + self.stopAudio(); + break; + } + }; + + // Hash change. + window.onhashchange = function(e) { + self.parseWindowAnchor(); + } +}; diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py new file mode 100644 index 0000000000..9e0198da85 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing.py @@ -0,0 +1,356 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Signal processing utility module. +""" + +import array +import enum +import logging +import os +import sys + +try: + import numpy as np +except ImportError: + logging.critical('Cannot import the third-party Python package numpy') + sys.exit(1) + +try: + import pydub + import pydub.generators +except ImportError: + logging.critical('Cannot import the third-party Python package pydub') + sys.exit(1) + +try: + import scipy.signal + import scipy.fftpack +except ImportError: + logging.critical('Cannot import the third-party Python package scipy') + sys.exit(1) + +from . import exceptions + + +class SignalProcessingUtils(object): + """Collection of signal processing utilities. + """ + + @enum.unique + class MixPadding(enum.Enum): + NO_PADDING = 0 + ZERO_PADDING = 1 + LOOP = 2 + + def __init__(self): + pass + + @classmethod + def LoadWav(cls, filepath, channels=1): + """Loads wav file. + + Args: + filepath: path to the wav audio track file to load. + channels: number of channels (downmixing to mono by default). + + Returns: + AudioSegment instance. + """ + if not os.path.exists(filepath): + logging.error('cannot find the <%s> audio track file', filepath) + raise exceptions.FileNotFoundError() + return pydub.AudioSegment.from_file( + filepath, format='wav', channels=channels) + + @classmethod + def SaveWav(cls, output_filepath, signal): + """Saves wav file. + + Args: + output_filepath: path to the wav audio track file to save. + signal: AudioSegment instance. + """ + return signal.export(output_filepath, format='wav') + + @classmethod + def CountSamples(cls, signal): + """Number of samples per channel. + + Args: + signal: AudioSegment instance. + + Returns: + An integer. + """ + number_of_samples = len(signal.get_array_of_samples()) + assert signal.channels > 0 + assert number_of_samples % signal.channels == 0 + return number_of_samples / signal.channels + + @classmethod + def GenerateSilence(cls, duration=1000, sample_rate=48000): + """Generates silence. + + This method can also be used to create a template AudioSegment instance. + A template can then be used with other Generate*() methods accepting an + AudioSegment instance as argument. + + Args: + duration: duration in ms. + sample_rate: sample rate. + + Returns: + AudioSegment instance. + """ + return pydub.AudioSegment.silent(duration, sample_rate) + + @classmethod + def GeneratePureTone(cls, template, frequency=440.0): + """Generates a pure tone. + + The pure tone is generated with the same duration and in the same format of + the given template signal. + + Args: + template: AudioSegment instance. + frequency: Frequency of the pure tone in Hz. + + Return: + AudioSegment instance. + """ + if frequency > template.frame_rate >> 1: + raise exceptions.SignalProcessingException('Invalid frequency') + + generator = pydub.generators.Sine( + sample_rate=template.frame_rate, + bit_depth=template.sample_width * 8, + freq=frequency) + + return generator.to_audio_segment( + duration=len(template), + volume=0.0) + + @classmethod + def GenerateWhiteNoise(cls, template): + """Generates white noise. + + The white noise is generated with the same duration and in the same format + of the given template signal. + + Args: + template: AudioSegment instance. + + Return: + AudioSegment instance. + """ + generator = pydub.generators.WhiteNoise( + sample_rate=template.frame_rate, + bit_depth=template.sample_width * 8) + return generator.to_audio_segment( + duration=len(template), + volume=0.0) + + @classmethod + def AudioSegmentToRawData(cls, signal): + samples = signal.get_array_of_samples() + if samples.typecode != 'h': + raise exceptions.SignalProcessingException('Unsupported samples type') + return np.array(signal.get_array_of_samples(), np.int16) + + @classmethod + def Fft(cls, signal, normalize=True): + if signal.channels != 1: + raise NotImplementedError('multiple-channel FFT not implemented') + x = cls.AudioSegmentToRawData(signal).astype(np.float32) + if normalize: + x /= max(abs(np.max(x)), 1.0) + y = scipy.fftpack.fft(x) + return y[:len(y) / 2] + + @classmethod + def DetectHardClipping(cls, signal, threshold=2): + """Detects hard clipping. + + Hard clipping is simply detected by counting samples that touch either the + lower or upper bound too many times in a row (according to |threshold|). + The presence of a single sequence of samples meeting such property is enough + to label the signal as hard clipped. + + Args: + signal: AudioSegment instance. + threshold: minimum number of samples at full-scale in a row. + + Returns: + True if hard clipping is detect, False otherwise. + """ + if signal.channels != 1: + raise NotImplementedError('multiple-channel clipping not implemented') + if signal.sample_width != 2: # Note that signal.sample_width is in bytes. + raise exceptions.SignalProcessingException( + 'hard-clipping detection only supported for 16 bit samples') + samples = cls.AudioSegmentToRawData(signal) + + # Detect adjacent clipped samples. + samples_type_info = np.iinfo(samples.dtype) + mask_min = samples == samples_type_info.min + mask_max = samples == samples_type_info.max + + def HasLongSequence(vector, min_legth=threshold): + """Returns True if there are one or more long sequences of True flags.""" + seq_length = 0 + for b in vector: + seq_length = seq_length + 1 if b else 0 + if seq_length >= min_legth: + return True + return False + + return HasLongSequence(mask_min) or HasLongSequence(mask_max) + + @classmethod + def ApplyImpulseResponse(cls, signal, impulse_response): + """Applies an impulse response to a signal. + + Args: + signal: AudioSegment instance. + impulse_response: list or numpy vector of float values. + + Returns: + AudioSegment instance. + """ + # Get samples. + assert signal.channels == 1, ( + 'multiple-channel recordings not supported') + samples = signal.get_array_of_samples() + + # Convolve. + logging.info('applying %d order impulse response to a signal lasting %d ms', + len(impulse_response), len(signal)) + convolved_samples = scipy.signal.fftconvolve( + in1=samples, + in2=impulse_response, + mode='full').astype(np.int16) + logging.info('convolution computed') + + # Cast. + convolved_samples = array.array(signal.array_type, convolved_samples) + + # Verify. + logging.debug('signal length: %d samples', len(samples)) + logging.debug('convolved signal length: %d samples', len(convolved_samples)) + assert len(convolved_samples) > len(samples) + + # Generate convolved signal AudioSegment instance. + convolved_signal = pydub.AudioSegment( + data=convolved_samples, + metadata={ + 'sample_width': signal.sample_width, + 'frame_rate': signal.frame_rate, + 'frame_width': signal.frame_width, + 'channels': signal.channels, + }) + assert len(convolved_signal) > len(signal) + + return convolved_signal + + @classmethod + def Normalize(cls, signal): + """Normalizes a signal. + + Args: + signal: AudioSegment instance. + + Returns: + An AudioSegment instance. + """ + return signal.apply_gain(-signal.max_dBFS) + + @classmethod + def Copy(cls, signal): + """Makes a copy os a signal. + + Args: + signal: AudioSegment instance. + + Returns: + An AudioSegment instance. + """ + return pydub.AudioSegment( + data=signal.get_array_of_samples(), + metadata={ + 'sample_width': signal.sample_width, + 'frame_rate': signal.frame_rate, + 'frame_width': signal.frame_width, + 'channels': signal.channels, + }) + + @classmethod + def MixSignals(cls, signal, noise, target_snr=0.0, + pad_noise=MixPadding.NO_PADDING): + """Mixes |signal| and |noise| with a target SNR. + + Mix |signal| and |noise| with a desired SNR by scaling |noise|. + If the target SNR is +/- infinite, a copy of signal/noise is returned. + If |signal| is shorter than |noise|, the length of the mix equals that of + |signal|. Otherwise, the mix length depends on whether padding is applied. + When padding is not applied, that is |pad_noise| is set to NO_PADDING + (default), the mix length equals that of |noise| - i.e., |signal| is + truncated. Otherwise, |noise| is extended and the resulting mix has the same + length of |signal|. + + Args: + signal: AudioSegment instance (signal). + noise: AudioSegment instance (noise). + target_snr: float, numpy.Inf or -numpy.Inf (dB). + pad_noise: SignalProcessingUtils.MixPadding, default: NO_PADDING. + + Returns: + An AudioSegment instance. + """ + # Handle infinite target SNR. + if target_snr == -np.Inf: + # Return a copy of noise. + logging.warning('SNR = -Inf, returning noise') + return cls.Copy(noise) + elif target_snr == np.Inf: + # Return a copy of signal. + logging.warning('SNR = +Inf, returning signal') + return cls.Copy(signal) + + # Check signal and noise power. + signal_power = float(signal.dBFS) + noise_power = float(noise.dBFS) + if signal_power == -np.Inf: + logging.error('signal has -Inf power, cannot mix') + raise exceptions.SignalProcessingException( + 'cannot mix a signal with -Inf power') + if noise_power == -np.Inf: + logging.error('noise has -Inf power, cannot mix') + raise exceptions.SignalProcessingException( + 'cannot mix a signal with -Inf power') + + # Mix. + gain_db = signal_power - noise_power - target_snr + signal_duration = len(signal) + noise_duration = len(noise) + if signal_duration <= noise_duration: + # Ignore |pad_noise|, |noise| is truncated if longer that |signal|, the + # mix will have the same length of |signal|. + return signal.overlay(noise.apply_gain(gain_db)) + elif pad_noise == cls.MixPadding.NO_PADDING: + # |signal| is longer than |noise|, but no padding is applied to |noise|. + # Truncate |signal|. + return noise.overlay(signal, gain_during_overlay=gain_db) + elif pad_noise == cls.MixPadding.ZERO_PADDING: + # TODO(alessiob): Check that this works as expected. + return signal.overlay(noise.apply_gain(gain_db)) + elif pad_noise == cls.MixPadding.LOOP: + # |signal| is longer than |noise|, extend |noise| by looping. + return signal.overlay(noise.apply_gain(gain_db), loop=True) + else: + raise exceptions.SignalProcessingException('invalid padding type') diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py new file mode 100644 index 0000000000..30ada41fb9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/signal_processing_unittest.py @@ -0,0 +1,186 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the signal_processing module. +""" + +import unittest + +import numpy as np +import pydub + +from . import exceptions +from . import signal_processing + + +class TestSignalProcessing(unittest.TestCase): + """Unit tests for the signal_processing module. + """ + + def testMixSignals(self): + # Generate a template signal with which white noise can be generated. + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + + # Generate two distinct AudioSegment instances with 1 second of white noise. + signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + noise = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + + # Extract samples. + signal_samples = signal.get_array_of_samples() + noise_samples = noise.get_array_of_samples() + + # Test target SNR -Inf (noise expected). + mix_neg_inf = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, -np.Inf) + self.assertTrue(len(noise), len(mix_neg_inf)) # Check duration. + mix_neg_inf_samples = mix_neg_inf.get_array_of_samples() + self.assertTrue( # Check samples. + all([x == y for x, y in zip(noise_samples, mix_neg_inf_samples)])) + + # Test target SNR 0.0 (different data expected). + mix_0 = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, 0.0) + self.assertTrue(len(signal), len(mix_0)) # Check duration. + self.assertTrue(len(noise), len(mix_0)) + mix_0_samples = mix_0.get_array_of_samples() + self.assertTrue( + any([x != y for x, y in zip(signal_samples, mix_0_samples)])) + self.assertTrue( + any([x != y for x, y in zip(noise_samples, mix_0_samples)])) + + # Test target SNR +Inf (signal expected). + mix_pos_inf = signal_processing.SignalProcessingUtils.MixSignals( + signal, noise, np.Inf) + self.assertTrue(len(signal), len(mix_pos_inf)) # Check duration. + mix_pos_inf_samples = mix_pos_inf.get_array_of_samples() + self.assertTrue( # Check samples. + all([x == y for x, y in zip(signal_samples, mix_pos_inf_samples)])) + + def testMixSignalsMinInfPower(self): + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + + with self.assertRaises(exceptions.SignalProcessingException): + _ = signal_processing.SignalProcessingUtils.MixSignals( + signal, silence, 0.0) + + with self.assertRaises(exceptions.SignalProcessingException): + _ = signal_processing.SignalProcessingUtils.MixSignals( + silence, signal, 0.0) + + def testMixSignalNoiseDifferentLengths(self): + # Test signals. + shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=1000, frame_rate=8000)) + longer = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=2000, frame_rate=8000)) + + # When the signal is shorter than the noise, the mix length always equals + # that of the signal regardless of whether padding is applied. + # No noise padding, length of signal less than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=shorter, + noise=longer, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.NO_PADDING) + self.assertEqual(len(shorter), len(mix)) + # With noise padding, length of signal less than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=shorter, + noise=longer, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.ZERO_PADDING) + self.assertEqual(len(shorter), len(mix)) + + # When the signal is longer than the noise, the mix length depends on + # whether padding is applied. + # No noise padding, length of signal greater than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.NO_PADDING) + self.assertEqual(len(shorter), len(mix)) + # With noise padding, length of signal greater than that of noise. + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.ZERO_PADDING) + self.assertEqual(len(longer), len(mix)) + + def testMixSignalNoisePaddingTypes(self): + # Test signals. + shorter = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + pydub.AudioSegment.silent(duration=1000, frame_rate=8000)) + longer = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=2000, frame_rate=8000), 440.0) + + # Zero padding: expect pure tone only in 1-2s. + mix_zero_pad = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + target_snr=-6, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.ZERO_PADDING) + + # Loop: expect pure tone plus noise in 1-2s. + mix_loop = signal_processing.SignalProcessingUtils.MixSignals( + signal=longer, + noise=shorter, + target_snr=-6, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.LOOP) + + def Energy(signal): + samples = signal_processing.SignalProcessingUtils.AudioSegmentToRawData( + signal).astype(np.float32) + return np.sum(samples * samples) + + e_mix_zero_pad = Energy(mix_zero_pad[-1000:]) + e_mix_loop = Energy(mix_loop[-1000:]) + self.assertLess(0, e_mix_zero_pad) + self.assertLess(e_mix_zero_pad, e_mix_loop) + + def testMixSignalSnr(self): + # Test signals. + tone_low = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=64, frame_rate=8000), 250.0) + tone_high = signal_processing.SignalProcessingUtils.GeneratePureTone( + pydub.AudioSegment.silent(duration=64, frame_rate=8000), 3000.0) + + def ToneAmplitudes(mix): + """Returns the amplitude of the coefficients #16 and #192, which + correspond to the tones at 250 and 3k Hz respectively.""" + mix_fft = np.absolute(signal_processing.SignalProcessingUtils.Fft(mix)) + return mix_fft[16], mix_fft[192] + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_low, + noise=tone_high, + target_snr=-6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_low, ampl_high) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_high, + noise=tone_low, + target_snr=-6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_high, ampl_low) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_low, + noise=tone_high, + target_snr=6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_high, ampl_low) + + mix = signal_processing.SignalProcessingUtils.MixSignals( + signal=tone_high, + noise=tone_low, + target_snr=6) + ampl_low, ampl_high = ToneAmplitudes(mix) + self.assertLess(ampl_low, ampl_high) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py new file mode 100644 index 0000000000..e313bf34b4 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation.py @@ -0,0 +1,422 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""APM module simulator. +""" + +import logging +import os + +from . import annotations +from . import data_access +from . import echo_path_simulation +from . import echo_path_simulation_factory +from . import eval_scores +from . import exceptions +from . import input_mixer +from . import input_signal_creator +from . import signal_processing +from . import test_data_generation + + +class ApmModuleSimulator(object): + """Audio processing module (APM) simulator class. + """ + + _TEST_DATA_GENERATOR_CLASSES = ( + test_data_generation.TestDataGenerator.REGISTERED_CLASSES) + _EVAL_SCORE_WORKER_CLASSES = eval_scores.EvaluationScore.REGISTERED_CLASSES + + _PREFIX_APM_CONFIG = 'apmcfg-' + _PREFIX_CAPTURE = 'capture-' + _PREFIX_RENDER = 'render-' + _PREFIX_ECHO_SIMULATOR = 'echosim-' + _PREFIX_TEST_DATA_GEN = 'datagen-' + _PREFIX_TEST_DATA_GEN_PARAMS = 'datagen_params-' + _PREFIX_SCORE = 'score-' + + def __init__(self, test_data_generator_factory, evaluation_score_factory, + ap_wrapper, evaluator, external_vads=None): + if external_vads is None: + external_vads = {} + self._test_data_generator_factory = test_data_generator_factory + self._evaluation_score_factory = evaluation_score_factory + self._audioproc_wrapper = ap_wrapper + self._evaluator = evaluator + self._annotator = annotations.AudioAnnotationsExtractor( + annotations.AudioAnnotationsExtractor.VadType.ENERGY_THRESHOLD | + annotations.AudioAnnotationsExtractor.VadType.WEBRTC_COMMON_AUDIO | + annotations.AudioAnnotationsExtractor.VadType.WEBRTC_APM, + external_vads + ) + + # Init. + self._test_data_generator_factory.SetOutputDirectoryPrefix( + self._PREFIX_TEST_DATA_GEN_PARAMS) + self._evaluation_score_factory.SetScoreFilenamePrefix( + self._PREFIX_SCORE) + + # Properties for each run. + self._base_output_path = None + self._output_cache_path = None + self._test_data_generators = None + self._evaluation_score_workers = None + self._config_filepaths = None + self._capture_input_filepaths = None + self._render_input_filepaths = None + self._echo_path_simulator_class = None + + @classmethod + def GetPrefixApmConfig(cls): + return cls._PREFIX_APM_CONFIG + + @classmethod + def GetPrefixCapture(cls): + return cls._PREFIX_CAPTURE + + @classmethod + def GetPrefixRender(cls): + return cls._PREFIX_RENDER + + @classmethod + def GetPrefixEchoSimulator(cls): + return cls._PREFIX_ECHO_SIMULATOR + + @classmethod + def GetPrefixTestDataGenerator(cls): + return cls._PREFIX_TEST_DATA_GEN + + @classmethod + def GetPrefixTestDataGeneratorParameters(cls): + return cls._PREFIX_TEST_DATA_GEN_PARAMS + + @classmethod + def GetPrefixScore(cls): + return cls._PREFIX_SCORE + + def Run(self, config_filepaths, capture_input_filepaths, + test_data_generator_names, eval_score_names, output_dir, + render_input_filepaths=None, echo_path_simulator_name=( + echo_path_simulation.NoEchoPathSimulator.NAME)): + """Runs the APM simulation. + + Initializes paths and required instances, then runs all the simulations. + The render input can be optionally added. If added, the number of capture + input audio tracks and the number of render input audio tracks have to be + equal. The two lists are used to form pairs of capture and render input. + + Args: + config_filepaths: set of APM configuration files to test. + capture_input_filepaths: set of capture input audio track files to test. + test_data_generator_names: set of test data generator names to test. + eval_score_names: set of evaluation score names to test. + output_dir: base path to the output directory for wav files and outcomes. + render_input_filepaths: set of render input audio track files to test. + echo_path_simulator_name: name of the echo path simulator to use when + render input is provided. + """ + assert render_input_filepaths is None or ( + len(capture_input_filepaths) == len(render_input_filepaths)), ( + 'render input set size not matching input set size') + assert render_input_filepaths is None or echo_path_simulator_name in ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES), ( + 'invalid echo path simulator') + self._base_output_path = os.path.abspath(output_dir) + + # Output path used to cache the data shared across simulations. + self._output_cache_path = os.path.join(self._base_output_path, '_cache') + + # Instance test data generators. + self._test_data_generators = [self._test_data_generator_factory.GetInstance( + test_data_generators_class=( + self._TEST_DATA_GENERATOR_CLASSES[name])) for name in ( + test_data_generator_names)] + + # Instance evaluation score workers. + self._evaluation_score_workers = [ + self._evaluation_score_factory.GetInstance( + evaluation_score_class=self._EVAL_SCORE_WORKER_CLASSES[name]) for ( + name) in eval_score_names] + + # Set APM configuration file paths. + self._config_filepaths = self._CreatePathsCollection(config_filepaths) + + # Set probing signal file paths. + if render_input_filepaths is None: + # Capture input only. + self._capture_input_filepaths = self._CreatePathsCollection( + capture_input_filepaths) + self._render_input_filepaths = None + else: + # Set both capture and render input signals. + self._SetTestInputSignalFilePaths( + capture_input_filepaths, render_input_filepaths) + + # Set the echo path simulator class. + self._echo_path_simulator_class = ( + echo_path_simulation.EchoPathSimulator.REGISTERED_CLASSES[ + echo_path_simulator_name]) + + self._SimulateAll() + + def _SimulateAll(self): + """Runs all the simulations. + + Iterates over the combinations of APM configurations, probing signals, and + test data generators. This method is mainly responsible for the creation of + the cache and output directories required in order to call _Simulate(). + """ + without_render_input = self._render_input_filepaths is None + + # Try different APM config files. + for config_name in self._config_filepaths: + config_filepath = self._config_filepaths[config_name] + + # Try different capture-render pairs. + for capture_input_name in self._capture_input_filepaths: + # Output path for the capture signal annotations. + capture_annotations_cache_path = os.path.join( + self._output_cache_path, + self._PREFIX_CAPTURE + capture_input_name) + data_access.MakeDirectory(capture_annotations_cache_path) + + # Capture. + capture_input_filepath = self._capture_input_filepaths[ + capture_input_name] + if not os.path.exists(capture_input_filepath): + # If the input signal file does not exist, try to create using the + # available input signal creators. + self._CreateInputSignal(capture_input_filepath) + assert os.path.exists(capture_input_filepath) + self._ExtractCaptureAnnotations( + capture_input_filepath, capture_annotations_cache_path) + + # Render and simulated echo path (optional). + render_input_filepath = None if without_render_input else ( + self._render_input_filepaths[capture_input_name]) + render_input_name = '(none)' if without_render_input else ( + self._ExtractFileName(render_input_filepath)) + echo_path_simulator = ( + echo_path_simulation_factory.EchoPathSimulatorFactory.GetInstance( + self._echo_path_simulator_class, render_input_filepath)) + + # Try different test data generators. + for test_data_generators in self._test_data_generators: + logging.info('APM config preset: <%s>, capture: <%s>, render: <%s>,' + 'test data generator: <%s>, echo simulator: <%s>', + config_name, capture_input_name, render_input_name, + test_data_generators.NAME, echo_path_simulator.NAME) + + # Output path for the generated test data. + test_data_cache_path = os.path.join( + capture_annotations_cache_path, + self._PREFIX_TEST_DATA_GEN + test_data_generators.NAME) + data_access.MakeDirectory(test_data_cache_path) + logging.debug('test data cache path: <%s>', test_data_cache_path) + + # Output path for the echo simulator and APM input mixer output. + echo_test_data_cache_path = os.path.join( + test_data_cache_path, 'echosim-{}'.format( + echo_path_simulator.NAME)) + data_access.MakeDirectory(echo_test_data_cache_path) + logging.debug('echo test data cache path: <%s>', + echo_test_data_cache_path) + + # Full output path. + output_path = os.path.join( + self._base_output_path, + self._PREFIX_APM_CONFIG + config_name, + self._PREFIX_CAPTURE + capture_input_name, + self._PREFIX_RENDER + render_input_name, + self._PREFIX_ECHO_SIMULATOR + echo_path_simulator.NAME, + self._PREFIX_TEST_DATA_GEN + test_data_generators.NAME) + data_access.MakeDirectory(output_path) + logging.debug('output path: <%s>', output_path) + + self._Simulate(test_data_generators, capture_input_filepath, + render_input_filepath, test_data_cache_path, + echo_test_data_cache_path, output_path, + config_filepath, echo_path_simulator) + + @staticmethod + def _CreateInputSignal(input_signal_filepath): + """Creates a missing input signal file. + + The file name is parsed to extract input signal creator and params. If a + creator is matched and the parameters are valid, a new signal is generated + and written in |input_signal_filepath|. + + Args: + input_signal_filepath: Path to the input signal audio file to write. + + Raises: + InputSignalCreatorException + """ + filename = os.path.splitext(os.path.split(input_signal_filepath)[-1])[0] + filename_parts = filename.split('-') + + if len(filename_parts) < 2: + raise exceptions.InputSignalCreatorException( + 'Cannot parse input signal file name') + + signal, metadata = input_signal_creator.InputSignalCreator.Create( + filename_parts[0], filename_parts[1].split('_')) + + signal_processing.SignalProcessingUtils.SaveWav( + input_signal_filepath, signal) + data_access.Metadata.SaveFileMetadata(input_signal_filepath, metadata) + + def _ExtractCaptureAnnotations(self, input_filepath, output_path, + annotation_name=""): + self._annotator.Extract(input_filepath) + self._annotator.Save(output_path, annotation_name) + + def _Simulate(self, test_data_generators, clean_capture_input_filepath, + render_input_filepath, test_data_cache_path, + echo_test_data_cache_path, output_path, config_filepath, + echo_path_simulator): + """Runs a single set of simulation. + + Simulates a given combination of APM configuration, probing signal, and + test data generator. It iterates over the test data generator + internal configurations. + + Args: + test_data_generators: TestDataGenerator instance. + clean_capture_input_filepath: capture input audio track file to be + processed by a test data generator and + not affected by echo. + render_input_filepath: render input audio track file to test. + test_data_cache_path: path for the generated test audio track files. + echo_test_data_cache_path: path for the echo simulator. + output_path: base output path for the test data generator. + config_filepath: APM configuration file to test. + echo_path_simulator: EchoPathSimulator instance. + """ + # Generate pairs of noisy input and reference signal files. + test_data_generators.Generate( + input_signal_filepath=clean_capture_input_filepath, + test_data_cache_path=test_data_cache_path, + base_output_path=output_path) + + # Extract metadata linked to the clean input file (if any). + apm_input_metadata = None + try: + apm_input_metadata = data_access.Metadata.LoadFileMetadata( + clean_capture_input_filepath) + except IOError as e: + apm_input_metadata = {} + apm_input_metadata['test_data_gen_name'] = test_data_generators.NAME + apm_input_metadata['test_data_gen_config'] = None + + # For each test data pair, simulate a call and evaluate. + for config_name in test_data_generators.config_names: + logging.info(' - test data generator config: <%s>', config_name) + apm_input_metadata['test_data_gen_config'] = config_name + + # Paths to the test data generator output. + # Note that the reference signal does not depend on the render input + # which is optional. + noisy_capture_input_filepath = ( + test_data_generators.noisy_signal_filepaths[config_name]) + reference_signal_filepath = ( + test_data_generators.reference_signal_filepaths[config_name]) + + # Output path for the evaluation (e.g., APM output file). + evaluation_output_path = test_data_generators.apm_output_paths[ + config_name] + + # Paths to the APM input signals. + echo_path_filepath = echo_path_simulator.Simulate( + echo_test_data_cache_path) + apm_input_filepath = input_mixer.ApmInputMixer.Mix( + echo_test_data_cache_path, noisy_capture_input_filepath, + echo_path_filepath) + + # Extract annotations for the APM input mix. + apm_input_basepath, apm_input_filename = os.path.split( + apm_input_filepath) + self._ExtractCaptureAnnotations( + apm_input_filepath, apm_input_basepath, + os.path.splitext(apm_input_filename)[0] + '-') + + # Simulate a call using APM. + self._audioproc_wrapper.Run( + config_filepath=config_filepath, + capture_input_filepath=apm_input_filepath, + render_input_filepath=render_input_filepath, + output_path=evaluation_output_path) + + try: + # Evaluate. + self._evaluator.Run( + evaluation_score_workers=self._evaluation_score_workers, + apm_input_metadata=apm_input_metadata, + apm_output_filepath=self._audioproc_wrapper.output_filepath, + reference_input_filepath=reference_signal_filepath, + output_path=evaluation_output_path) + + # Save simulation metadata. + data_access.Metadata.SaveAudioTestDataPaths( + output_path=evaluation_output_path, + clean_capture_input_filepath=clean_capture_input_filepath, + echo_free_capture_filepath=noisy_capture_input_filepath, + echo_filepath=echo_path_filepath, + render_filepath=render_input_filepath, + capture_filepath=apm_input_filepath, + apm_output_filepath=self._audioproc_wrapper.output_filepath, + apm_reference_filepath=reference_signal_filepath) + except exceptions.EvaluationScoreException as e: + logging.warning('the evaluation failed: %s', e.message) + continue + + def _SetTestInputSignalFilePaths(self, capture_input_filepaths, + render_input_filepaths): + """Sets input and render input file paths collections. + + Pairs the input and render input files by storing the file paths into two + collections. The key is the file name of the input file. + + Args: + capture_input_filepaths: list of file paths. + render_input_filepaths: list of file paths. + """ + self._capture_input_filepaths = {} + self._render_input_filepaths = {} + assert len(capture_input_filepaths) == len(render_input_filepaths) + for capture_input_filepath, render_input_filepath in zip( + capture_input_filepaths, render_input_filepaths): + name = self._ExtractFileName(capture_input_filepath) + self._capture_input_filepaths[name] = os.path.abspath( + capture_input_filepath) + self._render_input_filepaths[name] = os.path.abspath( + render_input_filepath) + + @classmethod + def _CreatePathsCollection(cls, filepaths): + """Creates a collection of file paths. + + Given a list of file paths, makes a collection with one item for each file + path. The value is absolute path, the key is the file name without + extenstion. + + Args: + filepaths: list of file paths. + + Returns: + A dict. + """ + filepaths_collection = {} + for filepath in filepaths: + name = cls._ExtractFileName(filepath) + filepaths_collection[name] = os.path.abspath(filepath) + return filepaths_collection + + @classmethod + def _ExtractFileName(cls, filepath): + return os.path.splitext(os.path.split(filepath)[-1])[0] diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py new file mode 100644 index 0000000000..c7ebcbc87a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/simulation_unittest.py @@ -0,0 +1,203 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the simulation module. +""" + +import logging +import os +import shutil +import sys +import tempfile +import unittest + +SRC = os.path.abspath(os.path.join( + os.path.dirname((__file__)), os.pardir, os.pardir, os.pardir, os.pardir)) +sys.path.append(os.path.join(SRC, 'third_party', 'pymock')) + +import mock +import pydub + +from . import audioproc_wrapper +from . import eval_scores_factory +from . import evaluation +from . import external_vad +from . import signal_processing +from . import simulation +from . import test_data_generation_factory + + +class TestApmModuleSimulator(unittest.TestCase): + """Unit tests for the ApmModuleSimulator class. + """ + + def setUp(self): + """Create temporary folders and fake audio track.""" + self._output_path = tempfile.mkdtemp() + self._tmp_path = tempfile.mkdtemp() + + silence = pydub.AudioSegment.silent(duration=1000, frame_rate=48000) + fake_signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + silence) + self._fake_audio_track_path = os.path.join(self._output_path, 'fake.wav') + signal_processing.SignalProcessingUtils.SaveWav( + self._fake_audio_track_path, fake_signal) + + def tearDown(self): + """Recursively delete temporary folders.""" + shutil.rmtree(self._output_path) + shutil.rmtree(self._tmp_path) + + def testSimulation(self): + # Instance dependencies to mock and inject. + ap_wrapper = audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper.DEFAULT_APM_SIMULATOR_BIN_PATH) + evaluator = evaluation.ApmModuleEvaluator() + ap_wrapper.Run = mock.MagicMock(name='Run') + evaluator.Run = mock.MagicMock(name='Run') + + # Instance non-mocked dependencies. + test_data_generator_factory = ( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)) + evaluation_score_factory = eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(__file__), 'fake_polqa')) + + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=test_data_generator_factory, + evaluation_score_factory=evaluation_score_factory, + ap_wrapper=ap_wrapper, + evaluator=evaluator, + external_vads={'fake': external_vad.ExternalVad(os.path.join( + os.path.dirname(__file__), 'fake_external_vad.py'), 'fake')} + ) + + # What to simulate. + config_files = ['apm_configs/default.json'] + input_files = [self._fake_audio_track_path] + test_data_generators = ['identity', 'white_noise'] + eval_scores = ['audio_level_mean', 'polqa'] + + # Run all simulations. + simulator.Run( + config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=test_data_generators, + eval_score_names=eval_scores, + output_dir=self._output_path) + + # Check. + # TODO(alessiob): Once the TestDataGenerator classes can be configured by + # the client code (e.g., number of SNR pairs for the white noise test data + # generator), the exact number of calls to ap_wrapper.Run and evaluator.Run + # is known; use that with assertEqual. + min_number_of_simulations = len(config_files) * len(input_files) * len( + test_data_generators) + self.assertGreaterEqual(len(ap_wrapper.Run.call_args_list), + min_number_of_simulations) + self.assertGreaterEqual(len(evaluator.Run.call_args_list), + min_number_of_simulations) + + def testInputSignalCreation(self): + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(__file__), 'fake_polqa'))), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper.DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + + # Inexistent input files to be silently created. + input_files = [ + os.path.join(self._tmp_path, 'pure_tone-440_1000.wav'), + os.path.join(self._tmp_path, 'pure_tone-1000_500.wav'), + ] + self.assertFalse(any([os.path.exists(input_file) for input_file in ( + input_files)])) + + # The input files are created during the simulation. + simulator.Run( + config_filepaths=['apm_configs/default.json'], + capture_input_filepaths=input_files, + test_data_generator_names=['identity'], + eval_score_names=['audio_level_peak'], + output_dir=self._output_path) + self.assertTrue(all([os.path.exists(input_file) for input_file in ( + input_files)])) + + def testPureToneGenerationWithTotalHarmonicDistorsion(self): + logging.warning = mock.MagicMock(name='warning') + + # Instance simulator. + simulator = simulation.ApmModuleSimulator( + test_data_generator_factory=( + test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', + noise_tracks_path='', + copy_with_identity=False)), + evaluation_score_factory=( + eval_scores_factory.EvaluationScoreWorkerFactory( + polqa_tool_bin_path=os.path.join( + os.path.dirname(__file__), 'fake_polqa'))), + ap_wrapper=audioproc_wrapper.AudioProcWrapper( + audioproc_wrapper.AudioProcWrapper.DEFAULT_APM_SIMULATOR_BIN_PATH), + evaluator=evaluation.ApmModuleEvaluator()) + + # What to simulate. + config_files = ['apm_configs/default.json'] + input_files = [os.path.join(self._tmp_path, 'pure_tone-440_1000.wav')] + eval_scores = ['thd'] + + # Should work. + simulator.Run( + config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=['identity'], + eval_score_names=eval_scores, + output_dir=self._output_path) + self.assertFalse(logging.warning.called) + + # Warning expected. + simulator.Run( + config_filepaths=config_files, + capture_input_filepaths=input_files, + test_data_generator_names=['white_noise'], # Not allowed with THD. + eval_score_names=eval_scores, + output_dir=self._output_path) + logging.warning.assert_called_with('the evaluation failed: %s', ( + 'The THD score cannot be used with any test data generator other than ' + '"identity"')) + + # # Init. + # generator = test_data_generation.IdentityTestDataGenerator('tmp') + # input_signal_filepath = os.path.join( + # self._test_data_cache_path, 'pure_tone-440_1000.wav') + + # # Check that the input signal is generated. + # self.assertFalse(os.path.exists(input_signal_filepath)) + # generator.Generate( + # input_signal_filepath=input_signal_filepath, + # test_data_cache_path=self._test_data_cache_path, + # base_output_path=self._base_output_path) + # self.assertTrue(os.path.exists(input_signal_filepath)) + + # # Check input signal properties. + # input_signal = signal_processing.SignalProcessingUtils.LoadWav( + # input_signal_filepath) + # self.assertEqual(1000, len(input_signal)) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py new file mode 100644 index 0000000000..dac4328588 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation.py @@ -0,0 +1,511 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Test data generators producing signals pairs intended to be used to +test the APM module. Each pair consists of a noisy input and a reference signal. +The former is used as APM input and it is generated by adding noise to a +clean audio track. The reference is the expected APM output. + +Throughout this file, the following naming convention is used: + - input signal: the clean signal (e.g., speech), + - noise signal: the noise to be summed up to the input signal (e.g., white + noise, Gaussian noise), + - noisy signal: input + noise. +The noise signal may or may not be a function of the clean signal. For +instance, white noise is independently generated, whereas reverberation is +obtained by convolving the input signal with an impulse response. +""" + +import logging +import os +import shutil +import sys + +try: + import scipy.io +except ImportError: + logging.critical('Cannot import the third-party Python package scipy') + sys.exit(1) + +from . import data_access +from . import exceptions +from . import signal_processing + + +class TestDataGenerator(object): + """Abstract class responsible for the generation of noisy signals. + + Given a clean signal, it generates two streams named noisy signal and + reference. The former is the clean signal deteriorated by the noise source, + the latter goes through the same deterioration process, but more "gently". + Noisy signal and reference are produced so that the reference is the signal + expected at the output of the APM module when the latter is fed with the noisy + signal. + + An test data generator generates one or more pairs. + """ + + NAME = None + REGISTERED_CLASSES = {} + + def __init__(self, output_directory_prefix): + self._output_directory_prefix = output_directory_prefix + # Init dictionaries with one entry for each test data generator + # configuration (e.g., different SNRs). + # Noisy audio track files (stored separately in a cache folder). + self._noisy_signal_filepaths = None + # Path to be used for the APM simulation output files. + self._apm_output_paths = None + # Reference audio track files (stored separately in a cache folder). + self._reference_signal_filepaths = None + self.Clear() + + @classmethod + def RegisterClass(cls, class_to_register): + """Registers a TestDataGenerator implementation. + + Decorator to automatically register the classes that extend + TestDataGenerator. + Example usage: + + @TestDataGenerator.RegisterClass + class IdentityGenerator(TestDataGenerator): + pass + """ + cls.REGISTERED_CLASSES[class_to_register.NAME] = class_to_register + return class_to_register + + @property + def config_names(self): + return self._noisy_signal_filepaths.keys() + + @property + def noisy_signal_filepaths(self): + return self._noisy_signal_filepaths + + @property + def apm_output_paths(self): + return self._apm_output_paths + + @property + def reference_signal_filepaths(self): + return self._reference_signal_filepaths + + def Generate( + self, input_signal_filepath, test_data_cache_path, base_output_path): + """Generates a set of noisy input and reference audiotrack file pairs. + + This method initializes an empty set of pairs and calls the _Generate() + method implemented in a concrete class. + + Args: + input_signal_filepath: path to the clean input audio track file. + test_data_cache_path: path to the cache of the generated audio track + files. + base_output_path: base path where output is written. + """ + self.Clear() + self._Generate( + input_signal_filepath, test_data_cache_path, base_output_path) + + def Clear(self): + """Clears the generated output path dictionaries. + """ + self._noisy_signal_filepaths = {} + self._apm_output_paths = {} + self._reference_signal_filepaths = {} + + def _Generate( + self, input_signal_filepath, test_data_cache_path, base_output_path): + """Abstract method to be implemented in each concrete class. + """ + raise NotImplementedError() + + def _AddNoiseSnrPairs(self, base_output_path, noisy_mix_filepaths, + snr_value_pairs): + """Adds noisy-reference signal pairs. + + Args: + base_output_path: noisy tracks base output path. + noisy_mix_filepaths: nested dictionary of noisy signal paths organized + by noisy track name and SNR level. + snr_value_pairs: list of SNR pairs. + """ + for noise_track_name in noisy_mix_filepaths: + for snr_noisy, snr_refence in snr_value_pairs: + config_name = '{0}_{1:d}_{2:d}_SNR'.format( + noise_track_name, snr_noisy, snr_refence) + output_path = self._MakeDir(base_output_path, config_name) + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=noisy_mix_filepaths[ + noise_track_name][snr_noisy], + reference_signal_filepath=noisy_mix_filepaths[ + noise_track_name][snr_refence], + output_path=output_path) + + def _AddNoiseReferenceFilesPair(self, config_name, noisy_signal_filepath, + reference_signal_filepath, output_path): + """Adds one noisy-reference signal pair. + + Args: + config_name: name of the APM configuration. + noisy_signal_filepath: path to noisy audio track file. + reference_signal_filepath: path to reference audio track file. + output_path: APM output path. + """ + assert config_name not in self._noisy_signal_filepaths + self._noisy_signal_filepaths[config_name] = os.path.abspath( + noisy_signal_filepath) + self._apm_output_paths[config_name] = os.path.abspath(output_path) + self._reference_signal_filepaths[config_name] = os.path.abspath( + reference_signal_filepath) + + def _MakeDir(self, base_output_path, test_data_generator_config_name): + output_path = os.path.join( + base_output_path, + self._output_directory_prefix + test_data_generator_config_name) + data_access.MakeDirectory(output_path) + return output_path + + +@TestDataGenerator.RegisterClass +class IdentityTestDataGenerator(TestDataGenerator): + """Generator that adds no noise. + + Both the noisy and the reference signals are the input signal. + """ + + NAME = 'identity' + + def __init__(self, output_directory_prefix, copy_with_identity): + TestDataGenerator.__init__(self, output_directory_prefix) + self._copy_with_identity = copy_with_identity + + @property + def copy_with_identity(self): + return self._copy_with_identity + + def _Generate( + self, input_signal_filepath, test_data_cache_path, base_output_path): + config_name = 'default' + output_path = self._MakeDir(base_output_path, config_name) + + if self._copy_with_identity: + input_signal_filepath_new = os.path.join( + test_data_cache_path, os.path.split(input_signal_filepath)[1]) + logging.info('copying ' + input_signal_filepath + ' to ' + ( + input_signal_filepath_new)) + shutil.copy(input_signal_filepath, input_signal_filepath_new) + input_signal_filepath = input_signal_filepath_new + + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=input_signal_filepath, + reference_signal_filepath=input_signal_filepath, + output_path=output_path) + + +@TestDataGenerator.RegisterClass +class WhiteNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds white noise. + """ + + NAME = 'white_noise' + + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 10 dB higher. + _SNR_VALUE_PAIRS = [ + [20, 30], # Smallest noise. + [10, 20], + [5, 15], + [0, 10], # Largest noise. + ] + + _NOISY_SIGNAL_FILENAME_TEMPLATE = 'noise_{0:d}_SNR.wav' + + def __init__(self, output_directory_prefix): + TestDataGenerator.__init__(self, output_directory_prefix) + + def _Generate( + self, input_signal_filepath, test_data_cache_path, base_output_path): + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + # Create the noise track. + noise_signal = signal_processing.SignalProcessingUtils.GenerateWhiteNoise( + input_signal) + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths = {} + snr_values = set([snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format(snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, noise_signal, snr) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[snr] = noisy_signal_filepath + + # Add all the noisy-reference signal pairs. + for snr_noisy, snr_refence in self._SNR_VALUE_PAIRS: + config_name = '{0:d}_{1:d}_SNR'.format(snr_noisy, snr_refence) + output_path = self._MakeDir(base_output_path, config_name) + self._AddNoiseReferenceFilesPair( + config_name=config_name, + noisy_signal_filepath=noisy_mix_filepaths[snr_noisy], + reference_signal_filepath=noisy_mix_filepaths[snr_refence], + output_path=output_path) + + +# TODO(alessiob): remove comment when class implemented. +# @TestDataGenerator.RegisterClass +class NarrowBandNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds narrow-band noise. + """ + + NAME = 'narrow_band_noise' + + def __init__(self, output_directory_prefix): + TestDataGenerator.__init__(self, output_directory_prefix) + + def _Generate( + self, input_signal_filepath, test_data_cache_path, base_output_path): + # TODO(alessiob): implement. + pass + + +@TestDataGenerator.RegisterClass +class AdditiveNoiseTestDataGenerator(TestDataGenerator): + """Generator that adds noise loops. + + This generator uses all the wav files in a given path (default: noise_tracks/) + and mixes them to the clean speech with different target SNRs (hard-coded). + """ + + NAME = 'additive_noise' + _NOISY_SIGNAL_FILENAME_TEMPLATE = '{0}_{1:d}_SNR.wav' + + DEFAULT_NOISE_TRACKS_PATH = os.path.join( + os.path.dirname(__file__), os.pardir, 'noise_tracks') + + # TODO(alessiob): Make the list of SNR pairs customizable. + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 10 dB higher. + _SNR_VALUE_PAIRS = [ + [20, 30], # Smallest noise. + [10, 20], + [5, 15], + [0, 10], # Largest noise. + ] + + def __init__(self, output_directory_prefix, noise_tracks_path): + TestDataGenerator.__init__(self, output_directory_prefix) + self._noise_tracks_path = noise_tracks_path + self._noise_tracks_file_names = [n for n in os.listdir( + self._noise_tracks_path) if n.lower().endswith('.wav')] + if len(self._noise_tracks_file_names) == 0: + raise exceptions.InitializationException( + 'No wav files found in the noise tracks path %s' % ( + self._noise_tracks_path)) + + def _Generate( + self, input_signal_filepath, test_data_cache_path, base_output_path): + """Generates test data pairs using environmental noise. + + For each noise track and pair of SNR values, the following two audio tracks + are created: the noisy signal and the reference signal. The former is + obtained by mixing the (clean) input signal to the corresponding noise + track enforcing the target SNR. + """ + # Init. + snr_values = set([snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + noisy_mix_filepaths = {} + for noise_track_filename in self._noise_tracks_file_names: + # Load the noise track. + noise_track_name, _ = os.path.splitext(noise_track_filename) + noise_track_filepath = os.path.join( + self._noise_tracks_path, noise_track_filename) + if not os.path.exists(noise_track_filepath): + logging.error('cannot find the <%s> noise track', noise_track_filename) + raise exceptions.FileNotFoundError() + + noise_signal = signal_processing.SignalProcessingUtils.LoadWav( + noise_track_filepath) + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths[noise_track_name] = {} + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format(noise_track_name, snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, noise_signal, snr, + pad_noise=signal_processing.SignalProcessingUtils.MixPadding.LOOP) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[noise_track_name][snr] = noisy_signal_filepath + + # Add all the noise-SNR pairs. + self._AddNoiseSnrPairs( + base_output_path, noisy_mix_filepaths, self._SNR_VALUE_PAIRS) + + +@TestDataGenerator.RegisterClass +class ReverberationTestDataGenerator(TestDataGenerator): + """Generator that adds reverberation noise. + + TODO(alessiob): Make this class more generic since the impulse response can be + anything (not just reverberation); call it e.g., + ConvolutionalNoiseTestDataGenerator. + """ + + NAME = 'reverberation' + + _IMPULSE_RESPONSES = { + 'lecture': 'air_binaural_lecture_0_0_1.mat', # Long echo. + 'booth': 'air_binaural_booth_0_0_1.mat', # Short echo. + } + _MAX_IMPULSE_RESPONSE_LENGTH = None + + # Each pair indicates the clean vs. noisy and reference vs. noisy SNRs. + # The reference (second value of each pair) always has a lower amount of noise + # - i.e., the SNR is 5 dB higher. + _SNR_VALUE_PAIRS = [ + [3, 8], # Smallest noise. + [-3, 2], # Largest noise. + ] + + _NOISE_TRACK_FILENAME_TEMPLATE = '{0}.wav' + _NOISY_SIGNAL_FILENAME_TEMPLATE = '{0}_{1:d}_SNR.wav' + + def __init__(self, output_directory_prefix, aechen_ir_database_path): + TestDataGenerator.__init__(self, output_directory_prefix) + self._aechen_ir_database_path = aechen_ir_database_path + + def _Generate( + self, input_signal_filepath, test_data_cache_path, base_output_path): + """Generates test data pairs using reverberation noise. + + For each impulse response, one noise track is created. For each impulse + response and pair of SNR values, the following 2 audio tracks are + created: the noisy signal and the reference signal. The former is + obtained by mixing the (clean) input signal to the corresponding noise + track enforcing the target SNR. + """ + # Init. + snr_values = set([snr for pair in self._SNR_VALUE_PAIRS for snr in pair]) + + # Load the input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + noisy_mix_filepaths = {} + for impulse_response_name in self._IMPULSE_RESPONSES: + noise_track_filename = self._NOISE_TRACK_FILENAME_TEMPLATE.format( + impulse_response_name) + noise_track_filepath = os.path.join( + test_data_cache_path, noise_track_filename) + noise_signal = None + try: + # Load noise track. + noise_signal = signal_processing.SignalProcessingUtils.LoadWav( + noise_track_filepath) + except exceptions.FileNotFoundError: + # Generate noise track by applying the impulse response. + impulse_response_filepath = os.path.join( + self._aechen_ir_database_path, + self._IMPULSE_RESPONSES[impulse_response_name]) + noise_signal = self._GenerateNoiseTrack( + noise_track_filepath, input_signal, impulse_response_filepath) + assert noise_signal is not None + + # Create the noisy mixes (once for each unique SNR value). + noisy_mix_filepaths[impulse_response_name] = {} + for snr in snr_values: + noisy_signal_filepath = os.path.join( + test_data_cache_path, + self._NOISY_SIGNAL_FILENAME_TEMPLATE.format( + impulse_response_name, snr)) + + # Create and save if not done. + if not os.path.exists(noisy_signal_filepath): + # Create noisy signal. + noisy_signal = signal_processing.SignalProcessingUtils.MixSignals( + input_signal, noise_signal, snr) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noisy_signal_filepath, noisy_signal) + + # Add file to the collection of mixes. + noisy_mix_filepaths[impulse_response_name][snr] = noisy_signal_filepath + + # Add all the noise-SNR pairs. + self._AddNoiseSnrPairs(base_output_path, noisy_mix_filepaths, + self._SNR_VALUE_PAIRS) + + def _GenerateNoiseTrack(self, noise_track_filepath, input_signal, + impulse_response_filepath): + """Generates noise track. + + Generate a signal by convolving input_signal with the impulse response in + impulse_response_filepath; then save to noise_track_filepath. + + Args: + noise_track_filepath: output file path for the noise track. + input_signal: (clean) input signal samples. + impulse_response_filepath: impulse response file path. + + Returns: + AudioSegment instance. + """ + # Load impulse response. + data = scipy.io.loadmat(impulse_response_filepath) + impulse_response = data['h_air'].flatten() + if self._MAX_IMPULSE_RESPONSE_LENGTH is not None: + logging.info('truncating impulse response from %d to %d samples', + len(impulse_response), self._MAX_IMPULSE_RESPONSE_LENGTH) + impulse_response = impulse_response[:self._MAX_IMPULSE_RESPONSE_LENGTH] + + # Apply impulse response. + processed_signal = ( + signal_processing.SignalProcessingUtils.ApplyImpulseResponse( + input_signal, impulse_response)) + + # Save. + signal_processing.SignalProcessingUtils.SaveWav( + noise_track_filepath, processed_signal) + + return processed_signal diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py new file mode 100644 index 0000000000..c80d150228 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_factory.py @@ -0,0 +1,71 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""TestDataGenerator factory class. +""" + +import logging + +from . import exceptions +from . import test_data_generation + + +class TestDataGeneratorFactory(object): + """Factory class used to create test data generators. + + Usage: Create a factory passing parameters to the ctor with which the + generators will be produced. + """ + + def __init__(self, aechen_ir_database_path, noise_tracks_path, + copy_with_identity): + """Ctor. + + Args: + aechen_ir_database_path: Path to the Aechen Impulse Response database. + noise_tracks_path: Path to the noise tracks to add. + copy_with_identity: Flag indicating whether the identity generator has to + make copies of the clean speech input files. + """ + self._output_directory_prefix = None + self._aechen_ir_database_path = aechen_ir_database_path + self._noise_tracks_path = noise_tracks_path + self._copy_with_identity = copy_with_identity + + def SetOutputDirectoryPrefix(self, prefix): + self._output_directory_prefix = prefix + + def GetInstance(self, test_data_generators_class): + """Creates an TestDataGenerator instance given a class object. + + Args: + test_data_generators_class: TestDataGenerator class object (not an + instance). + + Returns: + TestDataGenerator instance. + """ + if self._output_directory_prefix is None: + raise exceptions.InitializationException( + 'The output directory prefix for test data generators is not set') + logging.debug('factory producing %s', test_data_generators_class) + + if test_data_generators_class == ( + test_data_generation.IdentityTestDataGenerator): + return test_data_generation.IdentityTestDataGenerator( + self._output_directory_prefix, self._copy_with_identity) + elif test_data_generators_class == ( + test_data_generation.ReverberationTestDataGenerator): + return test_data_generation.ReverberationTestDataGenerator( + self._output_directory_prefix, self._aechen_ir_database_path) + elif test_data_generators_class == ( + test_data_generation.AdditiveNoiseTestDataGenerator): + return test_data_generation.AdditiveNoiseTestDataGenerator( + self._output_directory_prefix, self._noise_tracks_path) + else: + return test_data_generators_class(self._output_directory_prefix) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py new file mode 100644 index 0000000000..b0d003dbe8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/test_data_generation_unittest.py @@ -0,0 +1,206 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Unit tests for the test_data_generation module. +""" + +import os +import shutil +import tempfile +import unittest + +import numpy as np +import scipy.io + +from . import test_data_generation +from . import test_data_generation_factory +from . import signal_processing + + +class TestTestDataGenerators(unittest.TestCase): + """Unit tests for the test_data_generation module. + """ + + def setUp(self): + """Create temporary folders.""" + self._base_output_path = tempfile.mkdtemp() + self._test_data_cache_path = tempfile.mkdtemp() + self._fake_air_db_path = tempfile.mkdtemp() + + # Fake AIR DB impulse responses. + # TODO(alessiob): ReverberationTestDataGenerator will change to allow custom + # impulse responses. When changed, the coupling below between + # impulse_response_mat_file_names and + # ReverberationTestDataGenerator._IMPULSE_RESPONSES can be removed. + impulse_response_mat_file_names = [ + 'air_binaural_lecture_0_0_1.mat', + 'air_binaural_booth_0_0_1.mat', + ] + for impulse_response_mat_file_name in impulse_response_mat_file_names: + data = {'h_air': np.random.rand(1, 1000).astype('<f8')} + scipy.io.savemat(os.path.join( + self._fake_air_db_path, impulse_response_mat_file_name), data) + + def tearDown(self): + """Recursively delete temporary folders.""" + shutil.rmtree(self._base_output_path) + shutil.rmtree(self._test_data_cache_path) + shutil.rmtree(self._fake_air_db_path) + + def testTestDataGenerators(self): + # Preliminary check. + self.assertTrue(os.path.exists(self._base_output_path)) + self.assertTrue(os.path.exists(self._test_data_cache_path)) + + # Check that there is at least one registered test data generator. + registered_classes = ( + test_data_generation.TestDataGenerator.REGISTERED_CLASSES) + self.assertIsInstance(registered_classes, dict) + self.assertGreater(len(registered_classes), 0) + + # Instance generators factory. + generators_factory = test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path=self._fake_air_db_path, + noise_tracks_path=test_data_generation. \ + AdditiveNoiseTestDataGenerator. \ + DEFAULT_NOISE_TRACKS_PATH, + copy_with_identity=False) + generators_factory.SetOutputDirectoryPrefix('datagen-') + + # Use a simple input file as clean input signal. + input_signal_filepath = os.path.join( + os.getcwd(), 'probing_signals', 'tone-880.wav') + self.assertTrue(os.path.exists(input_signal_filepath)) + + # Load input signal. + input_signal = signal_processing.SignalProcessingUtils.LoadWav( + input_signal_filepath) + + # Try each registered test data generator. + for generator_name in registered_classes: + # Instance test data generator. + generator = generators_factory.GetInstance( + registered_classes[generator_name]) + + # Generate the noisy input - reference pairs. + generator.Generate( + input_signal_filepath=input_signal_filepath, + test_data_cache_path=self._test_data_cache_path, + base_output_path=self._base_output_path) + + # Perform checks. + self._CheckGeneratedPairsListSizes(generator) + self._CheckGeneratedPairsSignalDurations(generator, input_signal) + self._CheckGeneratedPairsOutputPaths(generator) + + def testTestidentityDataGenerator(self): + # Preliminary check. + self.assertTrue(os.path.exists(self._base_output_path)) + self.assertTrue(os.path.exists(self._test_data_cache_path)) + + # Use a simple input file as clean input signal. + input_signal_filepath = os.path.join( + os.getcwd(), 'probing_signals', 'tone-880.wav') + self.assertTrue(os.path.exists(input_signal_filepath)) + + def GetNoiseReferenceFilePaths(identity_generator): + noisy_signal_filepaths = identity_generator.noisy_signal_filepaths + reference_signal_filepaths = identity_generator.reference_signal_filepaths + assert noisy_signal_filepaths.keys() == reference_signal_filepaths.keys() + assert len(noisy_signal_filepaths.keys()) == 1 + key = noisy_signal_filepaths.keys()[0] + return noisy_signal_filepaths[key], reference_signal_filepaths[key] + + # Test the |copy_with_identity| flag. + for copy_with_identity in [False, True]: + # Instance the generator through the factory. + factory = test_data_generation_factory.TestDataGeneratorFactory( + aechen_ir_database_path='', noise_tracks_path='', + copy_with_identity=copy_with_identity) + factory.SetOutputDirectoryPrefix('datagen-') + generator = factory.GetInstance( + test_data_generation.IdentityTestDataGenerator) + # Check |copy_with_identity| is set correctly. + self.assertEqual(copy_with_identity, generator.copy_with_identity) + + # Generate test data and extract the paths to the noise and the reference + # files. + generator.Generate( + input_signal_filepath=input_signal_filepath, + test_data_cache_path=self._test_data_cache_path, + base_output_path=self._base_output_path) + noisy_signal_filepath, reference_signal_filepath = ( + GetNoiseReferenceFilePaths(generator)) + + # Check that a copy is made if and only if |copy_with_identity| is True. + if copy_with_identity: + self.assertNotEqual(noisy_signal_filepath, input_signal_filepath) + self.assertNotEqual(reference_signal_filepath, input_signal_filepath) + else: + self.assertEqual(noisy_signal_filepath, input_signal_filepath) + self.assertEqual(reference_signal_filepath, input_signal_filepath) + + def _CheckGeneratedPairsListSizes(self, generator): + config_names = generator.config_names + number_of_pairs = len(config_names) + self.assertEqual(number_of_pairs, + len(generator.noisy_signal_filepaths)) + self.assertEqual(number_of_pairs, + len(generator.apm_output_paths)) + self.assertEqual(number_of_pairs, + len(generator.reference_signal_filepaths)) + + def _CheckGeneratedPairsSignalDurations( + self, generator, input_signal): + """Checks duration of the generated signals. + + Checks that the noisy input and the reference tracks are audio files + with duration equal to or greater than that of the input signal. + + Args: + generator: TestDataGenerator instance. + input_signal: AudioSegment instance. + """ + input_signal_length = ( + signal_processing.SignalProcessingUtils.CountSamples(input_signal)) + + # Iterate over the noisy signal - reference pairs. + for config_name in generator.config_names: + # Load the noisy input file. + noisy_signal_filepath = generator.noisy_signal_filepaths[ + config_name] + noisy_signal = signal_processing.SignalProcessingUtils.LoadWav( + noisy_signal_filepath) + + # Check noisy input signal length. + noisy_signal_length = ( + signal_processing.SignalProcessingUtils.CountSamples(noisy_signal)) + self.assertGreaterEqual(noisy_signal_length, input_signal_length) + + # Load the reference file. + reference_signal_filepath = generator.reference_signal_filepaths[ + config_name] + reference_signal = signal_processing.SignalProcessingUtils.LoadWav( + reference_signal_filepath) + + # Check noisy input signal length. + reference_signal_length = ( + signal_processing.SignalProcessingUtils.CountSamples( + reference_signal)) + self.assertGreaterEqual(reference_signal_length, input_signal_length) + + def _CheckGeneratedPairsOutputPaths(self, generator): + """Checks that the output path created by the generator exists. + + Args: + generator: TestDataGenerator instance. + """ + # Iterate over the noisy signal - reference pairs. + for config_name in generator.config_names: + output_path = generator.apm_output_paths[config_name] + self.assertTrue(os.path.exists(output_path)) diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/vad.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/vad.cc new file mode 100644 index 0000000000..191cb1e9fc --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/vad.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +#include <array> +#include <fstream> +#include <memory> + +#include "common_audio/vad/include/vad.h" +#include "common_audio/wav_file.h" +#include "rtc_base/flags.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace test { +namespace { + +// The allowed values are 10, 20 or 30 ms. +constexpr uint8_t kAudioFrameLengthMilliseconds = 30; +constexpr int kMaxSampleRate = 48000; +constexpr size_t kMaxFrameLen = + kAudioFrameLengthMilliseconds * kMaxSampleRate / 1000; + +constexpr uint8_t kBitmaskBuffSize = 8; + +DEFINE_string(i, "", "Input wav file"); +DEFINE_string(o, "", "VAD output file"); + +int main(int argc, char* argv[]) { + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) + return 1; + + // Open wav input file and check properties. + WavReader wav_reader(FLAG_i); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files supported"; + return 1; + } + if (wav_reader.sample_rate() > kMaxSampleRate) { + RTC_LOG(LS_ERROR) << "Beyond maximum sample rate (" << kMaxSampleRate + << ")"; + return 1; + } + const size_t audio_frame_length = rtc::CheckedDivExact( + kAudioFrameLengthMilliseconds * wav_reader.sample_rate(), 1000); + if (audio_frame_length > kMaxFrameLen) { + RTC_LOG(LS_ERROR) << "The frame size and/or the sample rate are too large."; + return 1; + } + + // Create output file and write header. + std::ofstream out_file(FLAG_o, std::ofstream::binary); + const char audio_frame_length_ms = kAudioFrameLengthMilliseconds; + out_file.write(&audio_frame_length_ms, 1); // Header. + + // Run VAD and write decisions. + std::unique_ptr<Vad> vad = CreateVad(Vad::Aggressiveness::kVadNormal); + std::array<int16_t, kMaxFrameLen> samples; + char buff = 0; // Buffer to write one bit per frame. + uint8_t next = 0; // Points to the next bit to write in |buff|. + while (true) { + // Process frame. + const auto read_samples = + wav_reader.ReadSamples(audio_frame_length, samples.data()); + if (read_samples < audio_frame_length) + break; + const auto is_speech = vad->VoiceActivity( + samples.data(), audio_frame_length, wav_reader.sample_rate()); + + // Write output. + buff = is_speech ? buff | (1 << next) : buff & ~(1 << next); + if (++next == kBitmaskBuffSize) { + out_file.write(&buff, 1); // Flush. + buff = 0; // Reset. + next = 0; + } + } + + // Finalize. + char extra_bits = 0; + if (next > 0) { + extra_bits = kBitmaskBuffSize - next; + out_file.write(&buff, 1); // Flush. + } + out_file.write(&extra_bits, 1); + out_file.close(); + + return 0; +} + +} // namespace +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/simulator_buffers.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/simulator_buffers.cc new file mode 100644 index 0000000000..90c6d5ea72 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/simulator_buffers.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/simulator_buffers.h" + +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +SimulatorBuffers::SimulatorBuffers(int render_input_sample_rate_hz, + int capture_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_render_input_channels, + size_t num_capture_input_channels, + size_t num_render_output_channels, + size_t num_capture_output_channels) { + Random rand_gen(42); + CreateConfigAndBuffer(render_input_sample_rate_hz, num_render_input_channels, + &rand_gen, &render_input_buffer, &render_input_config, + &render_input, &render_input_samples); + + CreateConfigAndBuffer(render_output_sample_rate_hz, + num_render_output_channels, &rand_gen, + &render_output_buffer, &render_output_config, + &render_output, &render_output_samples); + + CreateConfigAndBuffer(capture_input_sample_rate_hz, + num_capture_input_channels, &rand_gen, + &capture_input_buffer, &capture_input_config, + &capture_input, &capture_input_samples); + + CreateConfigAndBuffer(capture_output_sample_rate_hz, + num_capture_output_channels, &rand_gen, + &capture_output_buffer, &capture_output_config, + &capture_output, &capture_output_samples); + + UpdateInputBuffers(); +} + +SimulatorBuffers::~SimulatorBuffers() = default; + +void SimulatorBuffers::CreateConfigAndBuffer( + int sample_rate_hz, + size_t num_channels, + Random* rand_gen, + std::unique_ptr<AudioBuffer>* buffer, + StreamConfig* config, + std::vector<float*>* buffer_data, + std::vector<float>* buffer_data_samples) { + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + *config = StreamConfig(sample_rate_hz, num_channels, false); + buffer->reset(new AudioBuffer(config->num_frames(), config->num_channels(), + config->num_frames(), config->num_channels(), + config->num_frames())); + + buffer_data_samples->resize(samples_per_channel * num_channels); + for (auto& v : *buffer_data_samples) { + v = rand_gen->Rand<float>(); + } + + buffer_data->resize(num_channels); + for (size_t ch = 0; ch < num_channels; ++ch) { + (*buffer_data)[ch] = &(*buffer_data_samples)[ch * samples_per_channel]; + } +} + +void SimulatorBuffers::UpdateInputBuffers() { + test::CopyVectorToAudioBuffer(capture_input_config, capture_input_samples, + capture_input_buffer.get()); + test::CopyVectorToAudioBuffer(render_input_config, render_input_samples, + render_input_buffer.get()); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/simulator_buffers.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/simulator_buffers.h new file mode 100644 index 0000000000..36dcf301a2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/simulator_buffers.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ + +#include <memory> +#include <vector> + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/random.h" + +namespace webrtc { +namespace test { + +struct SimulatorBuffers { + SimulatorBuffers(int render_input_sample_rate_hz, + int capture_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_render_input_channels, + size_t num_capture_input_channels, + size_t num_render_output_channels, + size_t num_capture_output_channels); + ~SimulatorBuffers(); + + void CreateConfigAndBuffer(int sample_rate_hz, + size_t num_channels, + Random* rand_gen, + std::unique_ptr<AudioBuffer>* buffer, + StreamConfig* config, + std::vector<float*>* buffer_data, + std::vector<float>* buffer_data_samples); + + void UpdateInputBuffers(); + + std::unique_ptr<AudioBuffer> render_input_buffer; + std::unique_ptr<AudioBuffer> capture_input_buffer; + std::unique_ptr<AudioBuffer> render_output_buffer; + std::unique_ptr<AudioBuffer> capture_output_buffer; + StreamConfig render_input_config; + StreamConfig capture_input_config; + StreamConfig render_output_config; + StreamConfig capture_output_config; + std::vector<float*> render_input; + std::vector<float> render_input_samples; + std::vector<float*> capture_input; + std::vector<float> capture_input_samples; + std::vector<float*> render_output; + std::vector<float> render_output_samples; + std::vector<float*> capture_output; + std::vector<float> capture_output_samples; +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_SIMULATOR_BUFFERS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/test_utils.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/test_utils.cc new file mode 100644 index 0000000000..23948c6dde --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/test_utils.cc @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <utility> + +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +RawFile::RawFile(const std::string& filename) + : file_handle_(fopen(filename.c_str(), "wb")) {} + +RawFile::~RawFile() { + fclose(file_handle_); +} + +void RawFile::WriteSamples(const int16_t* samples, size_t num_samples) { +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN + //convert to big-endian + int16_t* s = (int16_t*)samples; + for(size_t idx = 0; idx < num_samples; idx++) { + s[idx] = (samples[idx]<<8) | (samples[idx]>>8); + } +#endif + fwrite(samples, sizeof(*samples), num_samples, file_handle_); +} + +void RawFile::WriteSamples(const float* samples, size_t num_samples) { + fwrite(samples, sizeof(*samples), num_samples, file_handle_); +} + +ChannelBufferWavReader::ChannelBufferWavReader(std::unique_ptr<WavReader> file) + : file_(std::move(file)) {} + +ChannelBufferWavReader::~ChannelBufferWavReader() = default; + +bool ChannelBufferWavReader::Read(ChannelBuffer<float>* buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer->num_channels()); + interleaved_.resize(buffer->size()); + if (file_->ReadSamples(interleaved_.size(), &interleaved_[0]) != + interleaved_.size()) { + return false; + } + + FloatS16ToFloat(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + Deinterleave(&interleaved_[0], buffer->num_frames(), buffer->num_channels(), + buffer->channels()); + return true; +} + +ChannelBufferWavWriter::ChannelBufferWavWriter(std::unique_ptr<WavWriter> file) + : file_(std::move(file)) {} + +ChannelBufferWavWriter::~ChannelBufferWavWriter() = default; + +void ChannelBufferWavWriter::Write(const ChannelBuffer<float>& buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer.num_channels()); + interleaved_.resize(buffer.size()); + Interleave(buffer.channels(), buffer.num_frames(), buffer.num_channels(), + &interleaved_[0]); + FloatToFloatS16(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + file_->WriteSamples(&interleaved_[0], interleaved_.size()); +} + +void WriteIntData(const int16_t* data, + size_t length, + WavWriter* wav_file, + RawFile* raw_file) { + if (wav_file) { + wav_file->WriteSamples(data, length); + } + if (raw_file) { + raw_file->WriteSamples(data, length); + } +} + +void WriteFloatData(const float* const* data, + size_t samples_per_channel, + size_t num_channels, + WavWriter* wav_file, + RawFile* raw_file) { + size_t length = num_channels * samples_per_channel; + std::unique_ptr<float[]> buffer(new float[length]); + Interleave(data, samples_per_channel, num_channels, buffer.get()); + if (raw_file) { + raw_file->WriteSamples(buffer.get(), length); + } + // TODO(aluebs): Use ScaleToInt16Range() from audio_util + for (size_t i = 0; i < length; ++i) { + buffer[i] = buffer[i] > 0 ? + buffer[i] * std::numeric_limits<int16_t>::max() : + -buffer[i] * std::numeric_limits<int16_t>::min(); + } + if (wav_file) { + wav_file->WriteSamples(buffer.get(), length); + } +} + +FILE* OpenFile(const std::string& filename, const char* mode) { + FILE* file = fopen(filename.c_str(), mode); + if (!file) { + printf("Unable to open file %s\n", filename.c_str()); + exit(1); + } + return file; +} + +size_t SamplesFromRate(int rate) { + return static_cast<size_t>(AudioProcessing::kChunkSizeMs * rate / 1000); +} + +void SetFrameSampleRate(AudioFrame* frame, + int sample_rate_hz) { + frame->sample_rate_hz_ = sample_rate_hz; + frame->samples_per_channel_ = AudioProcessing::kChunkSizeMs * + sample_rate_hz / 1000; +} + +AudioProcessing::ChannelLayout LayoutFromChannels(size_t num_channels) { + switch (num_channels) { + case 1: + return AudioProcessing::kMono; + case 2: + return AudioProcessing::kStereo; + default: + RTC_CHECK(false); + return AudioProcessing::kMono; + } +} + +std::vector<Point> ParseArrayGeometry(const std::string& mic_positions) { + const std::vector<float> values = ParseList<float>(mic_positions); + const size_t num_mics = + rtc::CheckedDivExact(values.size(), static_cast<size_t>(3)); + RTC_CHECK_GT(num_mics, 0) << "mic_positions is not large enough."; + + std::vector<Point> result; + result.reserve(num_mics); + for (size_t i = 0; i < values.size(); i += 3) { + result.push_back(Point(values[i + 0], values[i + 1], values[i + 2])); + } + + return result; +} + +std::vector<Point> ParseArrayGeometry(const std::string& mic_positions, + size_t num_mics) { + std::vector<Point> result = ParseArrayGeometry(mic_positions); + RTC_CHECK_EQ(result.size(), num_mics) + << "Could not parse mic_positions or incorrect number of points."; + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/test_utils.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/test_utils.h new file mode 100644 index 0000000000..57dc7b3797 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/test_utils.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ + +#include <math.h> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <vector> + +#include "common_audio/channel_buffer.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +static const AudioProcessing::Error kNoErr = AudioProcessing::kNoError; +#define EXPECT_NOERR(expr) EXPECT_EQ(kNoErr, (expr)) + +class RawFile final { + public: + explicit RawFile(const std::string& filename); + ~RawFile(); + + void WriteSamples(const int16_t* samples, size_t num_samples); + void WriteSamples(const float* samples, size_t num_samples); + + private: + FILE* file_handle_; + + RTC_DISALLOW_COPY_AND_ASSIGN(RawFile); +}; + +// Reads ChannelBuffers from a provided WavReader. +class ChannelBufferWavReader final { + public: + explicit ChannelBufferWavReader(std::unique_ptr<WavReader> file); + ~ChannelBufferWavReader(); + + // Reads data from the file according to the |buffer| format. Returns false if + // a full buffer can't be read from the file. + bool Read(ChannelBuffer<float>* buffer); + + private: + std::unique_ptr<WavReader> file_; + std::vector<float> interleaved_; + + RTC_DISALLOW_COPY_AND_ASSIGN(ChannelBufferWavReader); +}; + +// Writes ChannelBuffers to a provided WavWriter. +class ChannelBufferWavWriter final { + public: + explicit ChannelBufferWavWriter(std::unique_ptr<WavWriter> file); + ~ChannelBufferWavWriter(); + + void Write(const ChannelBuffer<float>& buffer); + + private: + std::unique_ptr<WavWriter> file_; + std::vector<float> interleaved_; + + RTC_DISALLOW_COPY_AND_ASSIGN(ChannelBufferWavWriter); +}; + +void WriteIntData(const int16_t* data, + size_t length, + WavWriter* wav_file, + RawFile* raw_file); + +void WriteFloatData(const float* const* data, + size_t samples_per_channel, + size_t num_channels, + WavWriter* wav_file, + RawFile* raw_file); + +// Exits on failure; do not use in unit tests. +FILE* OpenFile(const std::string& filename, const char* mode); + +size_t SamplesFromRate(int rate); + +void SetFrameSampleRate(AudioFrame* frame, + int sample_rate_hz); + +template <typename T> +void SetContainerFormat(int sample_rate_hz, + size_t num_channels, + AudioFrame* frame, + std::unique_ptr<ChannelBuffer<T> >* cb) { + SetFrameSampleRate(frame, sample_rate_hz); + frame->num_channels_ = num_channels; + cb->reset(new ChannelBuffer<T>(frame->samples_per_channel_, num_channels)); +} + +AudioProcessing::ChannelLayout LayoutFromChannels(size_t num_channels); + +template <typename T> +float ComputeSNR(const T* ref, const T* test, size_t length, float* variance) { + float mse = 0; + float mean = 0; + *variance = 0; + for (size_t i = 0; i < length; ++i) { + T error = ref[i] - test[i]; + mse += error * error; + *variance += ref[i] * ref[i]; + mean += ref[i]; + } + mse /= length; + *variance /= length; + mean /= length; + *variance -= mean * mean; + + float snr = 100; // We assign 100 dB to the zero-error case. + if (mse > 0) + snr = 10 * log10(*variance / mse); + return snr; +} + +// Returns a vector<T> parsed from whitespace delimited values in to_parse, +// or an empty vector if the string could not be parsed. +template<typename T> +std::vector<T> ParseList(const std::string& to_parse) { + std::vector<T> values; + + std::istringstream str(to_parse); + std::copy( + std::istream_iterator<T>(str), + std::istream_iterator<T>(), + std::back_inserter(values)); + + return values; +} + +// Parses the array geometry from the command line. +// +// If a vector with size != num_mics is returned, an error has occurred and an +// appropriate error message has been printed to stdout. +std::vector<Point> ParseArrayGeometry(const std::string& mic_positions, + size_t num_mics); + +// Same as above, but without the num_mics check for when it isn't available. +std::vector<Point> ParseArrayGeometry(const std::string& mic_positions); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/unittest.proto b/third_party/libwebrtc/webrtc/modules/audio_processing/test/unittest.proto new file mode 100644 index 0000000000..3c93bfd713 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/unittest.proto @@ -0,0 +1,58 @@ +syntax = "proto2"; +option optimize_for = LITE_RUNTIME; +package webrtc.audioproc; + +message Test { + optional int32 num_reverse_channels = 1; + optional int32 num_input_channels = 2; + optional int32 num_output_channels = 3; + optional int32 sample_rate = 4; + + message Frame { + } + + repeated Frame frame = 5; + + optional int32 analog_level_average = 6; + optional int32 max_output_average = 7; + + optional int32 has_echo_count = 8; + optional int32 has_voice_count = 9; + optional int32 is_saturated_count = 10; + + message Statistic { + optional int32 instant = 1; + optional int32 average = 2; + optional int32 maximum = 3; + optional int32 minimum = 4; + } + + message EchoMetrics { + optional Statistic residual_echo_return_loss = 1; + optional Statistic echo_return_loss = 2; + optional Statistic echo_return_loss_enhancement = 3; + optional Statistic a_nlp = 4; + optional float divergent_filter_fraction = 5; + } + + repeated EchoMetrics echo_metrics = 11; + + message DelayMetrics { + optional int32 median = 1; + optional int32 std = 2; + optional float fraction_poor_delays = 3; + } + + repeated DelayMetrics delay_metrics = 12; + + repeated int32 rms_level = 13; + + optional float ns_speech_probability_average = 14; + + optional bool use_aec_extended_filter = 15; +} + +message OutputData { + repeated Test test = 1; +} + diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/unpack.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/unpack.cc new file mode 100644 index 0000000000..8e27706560 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/unpack.cc @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Commandline tool to unpack audioproc debug files. +// +// The debug files are dumped as protobuf blobs. For analysis, it's necessary +// to unpack the file into its component parts: audio and other data. + +#include <stdio.h> + +#include <memory> + +#include "modules/audio_processing/test/protobuf_utils.h" +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/flags.h" +#include "rtc_base/format_macros.h" +#include "rtc_base/ignore_wundef.h" +#include "typedefs.h" // NOLINT(build/include) + +RTC_PUSH_IGNORING_WUNDEF() +#include "modules/audio_processing/debug.pb.h" +RTC_POP_IGNORING_WUNDEF() + +// TODO(andrew): unpack more of the data. +DEFINE_string(input_file, "input", "The name of the input stream file."); +DEFINE_string(output_file, "ref_out", + "The name of the reference output stream file."); +DEFINE_string(reverse_file, "reverse", + "The name of the reverse input stream file."); +DEFINE_string(delay_file, "delay.int32", "The name of the delay file."); +DEFINE_string(drift_file, "drift.int32", "The name of the drift file."); +DEFINE_string(level_file, "level.int32", "The name of the level file."); +DEFINE_string(keypress_file, "keypress.bool", "The name of the keypress file."); +DEFINE_string(settings_file, "settings.txt", "The name of the settings file."); +DEFINE_bool(full, false, + "Unpack the full set of files (normally not needed)."); +DEFINE_bool(raw, false, "Write raw data instead of a WAV file."); +DEFINE_bool(text, + false, + "Write non-audio files as text files instead of binary files."); +DEFINE_bool(help, false, "Print this message."); + +#define PRINT_CONFIG(field_name) \ + if (msg.has_##field_name()) { \ + fprintf(settings_file, " " #field_name ": %d\n", msg.field_name()); \ + } + +namespace webrtc { + +using audioproc::Event; +using audioproc::ReverseStream; +using audioproc::Stream; +using audioproc::Init; + +void WriteData(const void* data, size_t size, FILE* file, + const std::string& filename) { + if (fwrite(data, size, 1, file) != 1) { + printf("Error when writing to %s\n", filename.c_str()); + exit(1); + } +} + +int do_main(int argc, char* argv[]) { + std::string program_name = argv[0]; + std::string usage = "Commandline tool to unpack audioproc debug files.\n" + "Example usage:\n" + program_name + " debug_dump.pb\n"; + + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) || + FLAG_help || argc < 2) { + printf("%s", usage.c_str()); + if (FLAG_help) { + rtc::FlagList::Print(nullptr, false); + return 0; + } + return 1; + } + + FILE* debug_file = OpenFile(argv[1], "rb"); + + Event event_msg; + int frame_count = 0; + size_t reverse_samples_per_channel = 0; + size_t input_samples_per_channel = 0; + size_t output_samples_per_channel = 0; + size_t num_reverse_channels = 0; + size_t num_input_channels = 0; + size_t num_output_channels = 0; + std::unique_ptr<WavWriter> reverse_wav_file; + std::unique_ptr<WavWriter> input_wav_file; + std::unique_ptr<WavWriter> output_wav_file; + std::unique_ptr<RawFile> reverse_raw_file; + std::unique_ptr<RawFile> input_raw_file; + std::unique_ptr<RawFile> output_raw_file; + + FILE* settings_file = OpenFile(FLAG_settings_file, "wb"); + + while (ReadMessageFromFile(debug_file, &event_msg)) { + if (event_msg.type() == Event::REVERSE_STREAM) { + if (!event_msg.has_reverse_stream()) { + printf("Corrupt input file: ReverseStream missing.\n"); + return 1; + } + + const ReverseStream msg = event_msg.reverse_stream(); + if (msg.has_data()) { + if (FLAG_raw && !reverse_raw_file) { + reverse_raw_file.reset(new RawFile(std::string(FLAG_reverse_file) + + ".pcm")); + } + // TODO(aluebs): Replace "num_reverse_channels * + // reverse_samples_per_channel" with "msg.data().size() / + // sizeof(int16_t)" and so on when this fix in audio_processing has made + // it into stable: https://webrtc-codereview.appspot.com/15299004/ + WriteIntData(reinterpret_cast<const int16_t*>(msg.data().data()), + num_reverse_channels * reverse_samples_per_channel, + reverse_wav_file.get(), + reverse_raw_file.get()); + } else if (msg.channel_size() > 0) { + if (FLAG_raw && !reverse_raw_file) { + reverse_raw_file.reset(new RawFile(std::string(FLAG_reverse_file) + + ".float")); + } + std::unique_ptr<const float* []> data( + new const float* [num_reverse_channels]); + for (size_t i = 0; i < num_reverse_channels; ++i) { + data[i] = reinterpret_cast<const float*>(msg.channel(i).data()); + } + WriteFloatData(data.get(), + reverse_samples_per_channel, + num_reverse_channels, + reverse_wav_file.get(), + reverse_raw_file.get()); + } + } else if (event_msg.type() == Event::STREAM) { + frame_count++; + if (!event_msg.has_stream()) { + printf("Corrupt input file: Stream missing.\n"); + return 1; + } + + const Stream msg = event_msg.stream(); + if (msg.has_input_data()) { + if (FLAG_raw && !input_raw_file) { + input_raw_file.reset(new RawFile(std::string(FLAG_input_file) + + ".pcm")); + } + WriteIntData(reinterpret_cast<const int16_t*>(msg.input_data().data()), + num_input_channels * input_samples_per_channel, + input_wav_file.get(), + input_raw_file.get()); + } else if (msg.input_channel_size() > 0) { + if (FLAG_raw && !input_raw_file) { + input_raw_file.reset(new RawFile(std::string(FLAG_input_file) + + ".float")); + } + std::unique_ptr<const float* []> data( + new const float* [num_input_channels]); + for (size_t i = 0; i < num_input_channels; ++i) { + data[i] = reinterpret_cast<const float*>(msg.input_channel(i).data()); + } + WriteFloatData(data.get(), + input_samples_per_channel, + num_input_channels, + input_wav_file.get(), + input_raw_file.get()); + } + + if (msg.has_output_data()) { + if (FLAG_raw && !output_raw_file) { + output_raw_file.reset(new RawFile(std::string(FLAG_output_file) + + ".pcm")); + } + WriteIntData(reinterpret_cast<const int16_t*>(msg.output_data().data()), + num_output_channels * output_samples_per_channel, + output_wav_file.get(), + output_raw_file.get()); + } else if (msg.output_channel_size() > 0) { + if (FLAG_raw && !output_raw_file) { + output_raw_file.reset(new RawFile(std::string(FLAG_output_file) + + ".float")); + } + std::unique_ptr<const float* []> data( + new const float* [num_output_channels]); + for (size_t i = 0; i < num_output_channels; ++i) { + data[i] = + reinterpret_cast<const float*>(msg.output_channel(i).data()); + } + WriteFloatData(data.get(), + output_samples_per_channel, + num_output_channels, + output_wav_file.get(), + output_raw_file.get()); + } + + if (FLAG_full) { + if (msg.has_delay()) { + static FILE* delay_file = OpenFile(FLAG_delay_file, "wb"); + int32_t delay = msg.delay(); + if (FLAG_text) { + fprintf(delay_file, "%d\n", delay); + } else { + WriteData(&delay, sizeof(delay), delay_file, FLAG_delay_file); + } + } + + if (msg.has_drift()) { + static FILE* drift_file = OpenFile(FLAG_drift_file, "wb"); + int32_t drift = msg.drift(); + if (FLAG_text) { + fprintf(drift_file, "%d\n", drift); + } else { + WriteData(&drift, sizeof(drift), drift_file, FLAG_drift_file); + } + } + + if (msg.has_level()) { + static FILE* level_file = OpenFile(FLAG_level_file, "wb"); + int32_t level = msg.level(); + if (FLAG_text) { + fprintf(level_file, "%d\n", level); + } else { + WriteData(&level, sizeof(level), level_file, FLAG_level_file); + } + } + + if (msg.has_keypress()) { + static FILE* keypress_file = OpenFile(FLAG_keypress_file, "wb"); + bool keypress = msg.keypress(); + if (FLAG_text) { + fprintf(keypress_file, "%d\n", keypress); + } else { + WriteData(&keypress, sizeof(keypress), keypress_file, + FLAG_keypress_file); + } + } + } + } else if (event_msg.type() == Event::CONFIG) { + if (!event_msg.has_config()) { + printf("Corrupt input file: Config missing.\n"); + return 1; + } + const audioproc::Config msg = event_msg.config(); + + fprintf(settings_file, "APM re-config at frame: %d\n", frame_count); + + PRINT_CONFIG(aec_enabled); + PRINT_CONFIG(aec_delay_agnostic_enabled); + PRINT_CONFIG(aec_drift_compensation_enabled); + PRINT_CONFIG(aec_extended_filter_enabled); + PRINT_CONFIG(aec_suppression_level); + PRINT_CONFIG(aecm_enabled); + PRINT_CONFIG(aecm_comfort_noise_enabled); + PRINT_CONFIG(aecm_routing_mode); + PRINT_CONFIG(agc_enabled); + PRINT_CONFIG(agc_mode); + PRINT_CONFIG(agc_limiter_enabled); + PRINT_CONFIG(noise_robust_agc_enabled); + PRINT_CONFIG(hpf_enabled); + PRINT_CONFIG(ns_enabled); + PRINT_CONFIG(ns_level); + PRINT_CONFIG(transient_suppression_enabled); + PRINT_CONFIG(intelligibility_enhancer_enabled); + if (msg.has_experiments_description()) { + fprintf(settings_file, " experiments_description: %s\n", + msg.experiments_description().c_str()); + } + } else if (event_msg.type() == Event::INIT) { + if (!event_msg.has_init()) { + printf("Corrupt input file: Init missing.\n"); + return 1; + } + + const Init msg = event_msg.init(); + // These should print out zeros if they're missing. + fprintf(settings_file, "Init at frame: %d\n", frame_count); + int input_sample_rate = msg.sample_rate(); + fprintf(settings_file, " Input sample rate: %d\n", input_sample_rate); + int output_sample_rate = msg.output_sample_rate(); + fprintf(settings_file, " Output sample rate: %d\n", output_sample_rate); + int reverse_sample_rate = msg.reverse_sample_rate(); + fprintf(settings_file, + " Reverse sample rate: %d\n", + reverse_sample_rate); + num_input_channels = msg.num_input_channels(); + fprintf(settings_file, " Input channels: %" PRIuS "\n", + num_input_channels); + num_output_channels = msg.num_output_channels(); + fprintf(settings_file, " Output channels: %" PRIuS "\n", + num_output_channels); + num_reverse_channels = msg.num_reverse_channels(); + fprintf(settings_file, " Reverse channels: %" PRIuS "\n", + num_reverse_channels); + + fprintf(settings_file, "\n"); + + if (reverse_sample_rate == 0) { + reverse_sample_rate = input_sample_rate; + } + if (output_sample_rate == 0) { + output_sample_rate = input_sample_rate; + } + + reverse_samples_per_channel = + static_cast<size_t>(reverse_sample_rate / 100); + input_samples_per_channel = + static_cast<size_t>(input_sample_rate / 100); + output_samples_per_channel = + static_cast<size_t>(output_sample_rate / 100); + + if (!FLAG_raw) { + // The WAV files need to be reset every time, because they cant change + // their sample rate or number of channels. + std::stringstream reverse_name; + reverse_name << FLAG_reverse_file << frame_count << ".wav"; + reverse_wav_file.reset(new WavWriter(reverse_name.str(), + reverse_sample_rate, + num_reverse_channels)); + std::stringstream input_name; + input_name << FLAG_input_file << frame_count << ".wav"; + input_wav_file.reset(new WavWriter(input_name.str(), + input_sample_rate, + num_input_channels)); + std::stringstream output_name; + output_name << FLAG_output_file << frame_count << ".wav"; + output_wav_file.reset(new WavWriter(output_name.str(), + output_sample_rate, + num_output_channels)); + } + } + } + + return 0; +} + +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::do_main(argc, argv); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/wav_based_simulator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/test/wav_based_simulator.cc new file mode 100644 index 0000000000..f53d1e5d8d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/wav_based_simulator.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/test/wav_based_simulator.h" + +#include <stdio.h> +#include <iostream> + +#include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace test { + +std::vector<WavBasedSimulator::SimulationEventType> +WavBasedSimulator::GetCustomEventChain(const std::string& filename) { + std::vector<WavBasedSimulator::SimulationEventType> call_chain; + FILE* stream = OpenFile(filename.c_str(), "r"); + + RTC_CHECK(stream) << "Could not open the custom call order file, reverting " + "to using the default call order"; + + char c; + size_t num_read = fread(&c, sizeof(char), 1, stream); + while (num_read > 0) { + switch (c) { + case 'r': + call_chain.push_back(SimulationEventType::kProcessReverseStream); + break; + case 'c': + call_chain.push_back(SimulationEventType::kProcessStream); + break; + case '\n': + break; + default: + FATAL() << "Incorrect custom call order file, reverting to using the " + "default call order"; + fclose(stream); + return WavBasedSimulator::GetDefaultEventChain(); + } + + num_read = fread(&c, sizeof(char), 1, stream); + } + + fclose(stream); + return call_chain; +} + +WavBasedSimulator::WavBasedSimulator(const SimulationSettings& settings) + : AudioProcessingSimulator(settings) {} + +WavBasedSimulator::~WavBasedSimulator() = default; + +std::vector<WavBasedSimulator::SimulationEventType> +WavBasedSimulator::GetDefaultEventChain() { + std::vector<WavBasedSimulator::SimulationEventType> call_chain(2); + call_chain[0] = SimulationEventType::kProcessStream; + call_chain[1] = SimulationEventType::kProcessReverseStream; + return call_chain; +} + +void WavBasedSimulator::PrepareProcessStreamCall() { + if (settings_.fixed_interface) { + CopyToAudioFrame(*in_buf_, &fwd_frame_); + } + ap_->set_stream_key_pressed(settings_.use_ts && (*settings_.use_ts)); + + RTC_CHECK_EQ(AudioProcessing::kNoError, + ap_->set_stream_delay_ms( + settings_.stream_delay ? *settings_.stream_delay : 0)); + + ap_->echo_cancellation()->set_stream_drift_samples( + settings_.stream_drift_samples ? *settings_.stream_drift_samples : 0); +} + +void WavBasedSimulator::PrepareReverseProcessStreamCall() { + if (settings_.fixed_interface) { + CopyToAudioFrame(*reverse_in_buf_, &rev_frame_); + } +} + +void WavBasedSimulator::Process() { + if (settings_.custom_call_order_filename) { + call_chain_ = WavBasedSimulator::GetCustomEventChain( + *settings_.custom_call_order_filename); + } else { + call_chain_ = WavBasedSimulator::GetDefaultEventChain(); + } + CreateAudioProcessor(); + + Initialize(); + + bool samples_left_to_process = true; + int call_chain_index = 0; + int num_forward_chunks_processed = 0; + while (samples_left_to_process) { + switch (call_chain_[call_chain_index]) { + case SimulationEventType::kProcessStream: + samples_left_to_process = HandleProcessStreamCall(); + ++num_forward_chunks_processed; + break; + case SimulationEventType::kProcessReverseStream: + if (settings_.reverse_input_filename) { + samples_left_to_process = HandleProcessReverseStreamCall(); + } + break; + default: + RTC_CHECK(false); + } + + call_chain_index = (call_chain_index + 1) % call_chain_.size(); + } + + DestroyAudioProcessor(); +} + +bool WavBasedSimulator::HandleProcessStreamCall() { + bool samples_left_to_process = buffer_reader_->Read(in_buf_.get()); + if (samples_left_to_process) { + PrepareProcessStreamCall(); + ProcessStream(settings_.fixed_interface); + } + return samples_left_to_process; +} + +bool WavBasedSimulator::HandleProcessReverseStreamCall() { + bool samples_left_to_process = + reverse_buffer_reader_->Read(reverse_in_buf_.get()); + if (samples_left_to_process) { + PrepareReverseProcessStreamCall(); + ProcessReverseStream(settings_.fixed_interface); + } + return samples_left_to_process; +} + +void WavBasedSimulator::Initialize() { + std::unique_ptr<WavReader> in_file( + new WavReader(settings_.input_filename->c_str())); + int input_sample_rate_hz = in_file->sample_rate(); + int input_num_channels = in_file->num_channels(); + buffer_reader_.reset(new ChannelBufferWavReader(std::move(in_file))); + + int output_sample_rate_hz = settings_.output_sample_rate_hz + ? *settings_.output_sample_rate_hz + : input_sample_rate_hz; + int output_num_channels = settings_.output_num_channels + ? *settings_.output_num_channels + : input_num_channels; + + int reverse_sample_rate_hz = 48000; + int reverse_num_channels = 1; + int reverse_output_sample_rate_hz = 48000; + int reverse_output_num_channels = 1; + if (settings_.reverse_input_filename) { + std::unique_ptr<WavReader> reverse_in_file( + new WavReader(settings_.reverse_input_filename->c_str())); + reverse_sample_rate_hz = reverse_in_file->sample_rate(); + reverse_num_channels = reverse_in_file->num_channels(); + reverse_buffer_reader_.reset( + new ChannelBufferWavReader(std::move(reverse_in_file))); + + reverse_output_sample_rate_hz = + settings_.reverse_output_sample_rate_hz + ? *settings_.reverse_output_sample_rate_hz + : reverse_sample_rate_hz; + reverse_output_num_channels = settings_.reverse_output_num_channels + ? *settings_.reverse_output_num_channels + : reverse_num_channels; + } + + SetupBuffersConfigsOutputs( + input_sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, + reverse_output_sample_rate_hz, input_num_channels, output_num_channels, + reverse_num_channels, reverse_output_num_channels); +} + +} // namespace test +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/test/wav_based_simulator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/test/wav_based_simulator.h new file mode 100644 index 0000000000..febcffb62c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/test/wav_based_simulator.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ +#define MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ + +#include <vector> + +#include "modules/audio_processing/test/audio_processing_simulator.h" + +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace test { + +// Used to perform an audio processing simulation from wav files. +class WavBasedSimulator final : public AudioProcessingSimulator { + public: + explicit WavBasedSimulator(const SimulationSettings& settings); + ~WavBasedSimulator() override; + + // Processes the WAV input. + void Process() override; + + private: + enum SimulationEventType { + kProcessStream, + kProcessReverseStream, + }; + + void Initialize(); + bool HandleProcessStreamCall(); + bool HandleProcessReverseStreamCall(); + void PrepareProcessStreamCall(); + void PrepareReverseProcessStreamCall(); + static std::vector<SimulationEventType> GetDefaultEventChain(); + static std::vector<SimulationEventType> GetCustomEventChain( + const std::string& filename); + + std::vector<SimulationEventType> call_chain_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(WavBasedSimulator); +}; + +} // namespace test +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TEST_WAV_BASED_SIMULATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/three_band_filter_bank.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/three_band_filter_bank.cc new file mode 100644 index 0000000000..f5a319bc54 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/three_band_filter_bank.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// +// The idea is to take a heterodyne system and change the order of the +// components to get something which is efficient to implement digitally. +// +// It is possible to separate the filter using the noble identity as follows: +// +// H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3) +// +// This is used in the analysis stage to first downsample serial to parallel +// and then filter each branch with one of these polyphase decompositions of the +// lowpass prototype. Because each filter is only a modulation of the prototype, +// it is enough to multiply each coefficient by the respective cosine value to +// shift it to the desired band. But because the cosine period is 12 samples, +// it requires separating the prototype even further using the noble identity. +// After filtering and modulating for each band, the output of all filters is +// accumulated to get the downsampled bands. +// +// A similar logic can be applied to the synthesis stage. + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "modules/audio_processing/three_band_filter_bank.h" + +#include <cmath> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +const size_t kNumBands = 3; +const size_t kSparsity = 4; + +// Factors to take into account when choosing |kNumCoeffs|: +// 1. Higher |kNumCoeffs|, means faster transition, which ensures less +// aliasing. This is especially important when there is non-linear +// processing between the splitting and merging. +// 2. The delay that this filter bank introduces is +// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly +// with |kNumCoeffs|. +// 3. The computation complexity also increases linearly with |kNumCoeffs|. +const size_t kNumCoeffs = 4; + +// The Matlab code to generate these |kLowpassCoeffs| is: +// +// N = kNumBands * kSparsity * kNumCoeffs - 1; +// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); +// reshape(h, kNumBands * kSparsity, kNumCoeffs); +// +// Because the total bandwidth of the lower and higher band is double the middle +// one (because of the spectrum parity), the low-pass prototype is half the +// bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation +// to the right places. +// A Kaiser window is used because of its flexibility and the alpha is set to +// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast +// transition. +const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] = + {{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, + {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, + {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, + {-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f}, + {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, + {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, + {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, + {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, + {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, + {+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f}, + {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, + {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; + +// Downsamples |in| into |out|, taking one every |kNumbands| starting from +// |offset|. |split_length| is the |out| length. |in| has to be at least +// |kNumBands| * |split_length| long. +void Downsample(const float* in, + size_t split_length, + size_t offset, + float* out) { + for (size_t i = 0; i < split_length; ++i) { + out[i] = in[kNumBands * i + offset]; + } +} + +// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every +// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out| +// has to be at least |kNumBands| * |split_length| long. +void Upsample(const float* in, size_t split_length, size_t offset, float* out) { + for (size_t i = 0; i < split_length; ++i) { + out[kNumBands * i + offset] += kNumBands * in[i]; + } +} + +} // namespace + +// Because the low-pass filter prototype has half bandwidth it is possible to +// use a DCT to shift it in both directions at the same time, to the center +// frequencies [1 / 12, 3 / 12, 5 / 12]. +ThreeBandFilterBank::ThreeBandFilterBank(size_t length) + : in_buffer_(rtc::CheckedDivExact(length, kNumBands)), + out_buffer_(in_buffer_.size()) { + for (size_t i = 0; i < kSparsity; ++i) { + for (size_t j = 0; j < kNumBands; ++j) { + analysis_filters_.push_back( + std::unique_ptr<SparseFIRFilter>(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i))); + synthesis_filters_.push_back( + std::unique_ptr<SparseFIRFilter>(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i))); + } + } + dct_modulation_.resize(kNumBands * kSparsity); + for (size_t i = 0; i < dct_modulation_.size(); ++i) { + dct_modulation_[i].resize(kNumBands); + for (size_t j = 0; j < kNumBands; ++j) { + dct_modulation_[i][j] = + 2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size()); + } + } +} + +ThreeBandFilterBank::~ThreeBandFilterBank() = default; + +// The analysis can be separated in these steps: +// 1. Serial to parallel downsampling by a factor of |kNumBands|. +// 2. Filtering of |kSparsity| different delayed signals with polyphase +// decomposition of the low-pass prototype filter and upsampled by a factor +// of |kSparsity|. +// 3. Modulating with cosines and accumulating to get the desired band. +void ThreeBandFilterBank::Analysis(const float* in, + size_t length, + float* const* out) { + RTC_CHECK_EQ(in_buffer_.size(), rtc::CheckedDivExact(length, kNumBands)); + for (size_t i = 0; i < kNumBands; ++i) { + memset(out[i], 0, in_buffer_.size() * sizeof(*out[i])); + } + for (size_t i = 0; i < kNumBands; ++i) { + Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]); + for (size_t j = 0; j < kSparsity; ++j) { + const size_t offset = i + j * kNumBands; + analysis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out); + } + } +} + +// The synthesis can be separated in these steps: +// 1. Modulating with cosines. +// 2. Filtering each one with a polyphase decomposition of the low-pass +// prototype filter upsampled by a factor of |kSparsity| and accumulating +// |kSparsity| signals with different delays. +// 3. Parallel to serial upsampling by a factor of |kNumBands|. +void ThreeBandFilterBank::Synthesis(const float* const* in, + size_t split_length, + float* out) { + RTC_CHECK_EQ(in_buffer_.size(), split_length); + memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out)); + for (size_t i = 0; i < kNumBands; ++i) { + for (size_t j = 0; j < kSparsity; ++j) { + const size_t offset = i + j * kNumBands; + UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]); + synthesis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + Upsample(&out_buffer_[0], out_buffer_.size(), i, out); + } + } +} + + +// Modulates |in| by |dct_modulation_| and accumulates it in each of the +// |kNumBands| bands of |out|. |offset| is the index in the period of the +// cosines used for modulation. |split_length| is the length of |in| and each +// band of |out|. +void ThreeBandFilterBank::DownModulate(const float* in, + size_t split_length, + size_t offset, + float* const* out) { + for (size_t i = 0; i < kNumBands; ++i) { + for (size_t j = 0; j < split_length; ++j) { + out[i][j] += dct_modulation_[offset][i] * in[j]; + } + } +} + +// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and +// accumulates them in |out|. |out| is cleared before starting to accumulate. +// |offset| is the index in the period of the cosines used for modulation. +// |split_length| is the length of each band of |in| and |out|. +void ThreeBandFilterBank::UpModulate(const float* const* in, + size_t split_length, + size_t offset, + float* out) { + memset(out, 0, split_length * sizeof(*out)); + for (size_t i = 0; i < kNumBands; ++i) { + for (size_t j = 0; j < split_length; ++j) { + out[j] += dct_modulation_[offset][i] * in[i][j]; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/three_band_filter_bank.h b/third_party/libwebrtc/webrtc/modules/audio_processing/three_band_filter_bank.h new file mode 100644 index 0000000000..ccbf2ddf97 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/three_band_filter_bank.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ +#define MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ + +#include <cstring> +#include <memory> +#include <vector> + +#include "common_audio/sparse_fir_filter.h" + +namespace webrtc { + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// The low-pass filter prototype has these characteristics: +// * Pass-band ripple = 0.3dB +// * Pass-band frequency = 0.147 (7kHz at 48kHz) +// * Stop-band attenuation = 40dB +// * Stop-band frequency = 0.192 (9.2kHz at 48kHz) +// * Delay = 24 samples (500us at 48kHz) +// * Linear phase +// This filter bank does not satisfy perfect reconstruction. The SNR after +// analysis and synthesis (with no processing in between) is approximately 9.5dB +// depending on the input signal after compensating for the delay. +class ThreeBandFilterBank final { + public: + explicit ThreeBandFilterBank(size_t length); + ~ThreeBandFilterBank(); + + // Splits |in| into 3 downsampled frequency bands in |out|. + // |length| is the |in| length. Each of the 3 bands of |out| has to have a + // length of |length| / 3. + void Analysis(const float* in, size_t length, float* const* out); + + // Merges the 3 downsampled frequency bands in |in| into |out|. + // |split_length| is the length of each band of |in|. |out| has to have at + // least a length of 3 * |split_length|. + void Synthesis(const float* const* in, size_t split_length, float* out); + + private: + void DownModulate(const float* in, + size_t split_length, + size_t offset, + float* const* out); + void UpModulate(const float* const* in, + size_t split_length, + size_t offset, + float* out); + + std::vector<float> in_buffer_; + std::vector<float> out_buffer_; + std::vector<std::unique_ptr<SparseFIRFilter>> analysis_filters_; + std::vector<std::unique_ptr<SparseFIRFilter>> synthesis_filters_; + std::vector<std::vector<float>> dct_modulation_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/click_annotate.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/click_annotate.cc new file mode 100644 index 0000000000..a8b4a307e1 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/click_annotate.cc @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <cfloat> +#include <cstdio> +#include <cstdlib> +#include <memory> +#include <vector> + +#include "modules/audio_processing/transient/transient_detector.h" +#include "modules/audio_processing/transient/file_utils.h" +#include "system_wrappers/include/file_wrapper.h" + +using webrtc::FileWrapper; +using webrtc::TransientDetector; + +// Application to generate a RTP timing file. +// Opens the PCM file and divides the signal in frames. +// Creates a send times array, one for each step. +// Each block that contains a transient, has an infinite send time. +// The resultant array is written to a DAT file +// Returns -1 on error or |lost_packets| otherwise. +int main(int argc, char* argv[]) { + if (argc != 5) { + printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]); + printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]); + printf("Opens the PCMfile with sampleRate in Hertz.\n"); + printf("Creates a send times array, one for each chunkSize "); + printf("milliseconds step.\n"); + printf("Each block that contains a transient, has an infinite send time. "); + printf("The resultant array is written to a DATfile.\n\n"); + return 0; + } + + std::unique_ptr<FileWrapper> pcm_file(FileWrapper::Create()); + pcm_file->OpenFile(argv[1], true); + if (!pcm_file->is_open()) { + printf("\nThe %s could not be opened.\n\n", argv[1]); + return -1; + } + + std::unique_ptr<FileWrapper> dat_file(FileWrapper::Create()); + dat_file->OpenFile(argv[2], false); + if (!dat_file->is_open()) { + printf("\nThe %s could not be opened.\n\n", argv[2]); + return -1; + } + + int chunk_size_ms = atoi(argv[3]); + if (chunk_size_ms <= 0) { + printf("\nThe chunkSize must be a positive integer\n\n"); + return -1; + } + + int sample_rate_hz = atoi(argv[4]); + if (sample_rate_hz <= 0) { + printf("\nThe sampleRate must be a positive integer\n\n"); + return -1; + } + + TransientDetector detector(sample_rate_hz); + int lost_packets = 0; + size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000; + std::unique_ptr<float[]> audio_buffer(new float[audio_buffer_length]); + std::vector<float> send_times; + + // Read first buffer from the PCM test file. + size_t file_samples_read = ReadInt16FromFileToFloatBuffer( + pcm_file.get(), + audio_buffer_length, + audio_buffer.get()); + for (int time = 0; file_samples_read > 0; time += chunk_size_ms) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < audio_buffer_length; ++i) { + audio_buffer[i] = 0.0; + } + float value = + detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0); + if (value < 0.5f) { + value = time; + } else { + value = FLT_MAX; + ++lost_packets; + } + send_times.push_back(value); + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(), + audio_buffer_length, + audio_buffer.get()); + } + + size_t floats_written = WriteFloatBufferToFile(dat_file.get(), + send_times.size(), + &send_times[0]); + + if (floats_written == 0) { + printf("\nThe send times could not be written to DAT file\n\n"); + return -1; + } + + pcm_file->CloseFile(); + dat_file->CloseFile(); + + return lost_packets; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/common.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/common.h new file mode 100644 index 0000000000..69546fc57b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/common.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +namespace webrtc { +namespace ts { + +static const float kPi = 3.14159265358979323846f; +static const int kChunkSizeMs = 10; +enum { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 +}; + +} // namespace ts +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h new file mode 100644 index 0000000000..4de24e0fcb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file defines the coefficients of the FIR based approximation of +// the Meyer Wavelet +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ + +// Decomposition coefficients Daubechies 8. + +namespace webrtc { + +const int kDaubechies8CoefficientsLength = 16; + +const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength] + = { + -5.44158422430816093862e-02f, + 3.12871590914465924627e-01f, + -6.75630736298012846142e-01f, + 5.85354683654869090148e-01f, + 1.58291052560238926228e-02f, + -2.84015542962428091389e-01f, + -4.72484573997972536787e-04f, + 1.28747426620186011803e-01f, + 1.73693010020221083600e-02f, + -4.40882539310647192377e-02f, + -1.39810279170155156436e-02f, + 8.74609404701565465445e-03f, + 4.87035299301066034600e-03f, + -3.91740372995977108837e-04f, + -6.75449405998556772109e-04f, + -1.17476784002281916305e-04f +}; + +const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = { + -1.17476784002281916305e-04f, + 6.75449405998556772109e-04f, + -3.91740372995977108837e-04f, + -4.87035299301066034600e-03f, + 8.74609404701565465445e-03f, + 1.39810279170155156436e-02f, + -4.40882539310647192377e-02f, + -1.73693010020221083600e-02f, + 1.28747426620186011803e-01f, + 4.72484573997972536787e-04f, + -2.84015542962428091389e-01f, + -1.58291052560238926228e-02f, + 5.85354683654869090148e-01f, + 6.75630736298012846142e-01f, + 3.12871590914465924627e-01f, + 5.44158422430816093862e-02f +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/dyadic_decimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/dyadic_decimator.h new file mode 100644 index 0000000000..104f95d3af --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/dyadic_decimator.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ + +#include <cstdlib> + +#include "typedefs.h" // NOLINT(build/include) + +// Provides a set of static methods to perform dyadic decimations. + +namespace webrtc { + +// Returns the proper length of the output buffer that you should use for the +// given |in_length| and decimation |odd_sequence|. +// Return -1 on error. +inline size_t GetOutLengthToDyadicDecimate(size_t in_length, + bool odd_sequence) { + size_t out_length = in_length / 2; + + if (in_length % 2 == 1 && !odd_sequence) { + ++out_length; + } + + return out_length; +} + +// Performs a dyadic decimation: removes every odd/even member of a sequence +// halving its overall length. +// Arguments: +// in: array of |in_length|. +// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...); +// if true, the even members will be removed (0, 2, 4, ...). +// out: array of |out_length|. |out_length| must be large enough to +// hold the decimated output. The necessary length can be provided by +// GetOutLengthToDyadicDecimate(). +// Must be previously allocated. +// Returns the number of output samples, -1 on error. +template<typename T> +static size_t DyadicDecimate(const T* in, + size_t in_length, + bool odd_sequence, + T* out, + size_t out_length) { + size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence); + + if (!in || !out || in_length <= 0 || out_length < half_length) { + return 0; + } + + size_t output_samples = 0; + size_t index_adjustment = odd_sequence ? 1 : 0; + for (output_samples = 0; output_samples < half_length; ++output_samples) { + out[output_samples] = in[output_samples * 2 + index_adjustment]; + } + + return output_samples; +} + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc new file mode 100644 index 0000000000..c407f47ffb --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/dyadic_decimator_unittest.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/dyadic_decimator.h" + +#include "test/gtest.h" + +namespace webrtc { + +static const size_t kEvenBufferLength = 6; +static const size_t kOddBufferLength = 5; +static const size_t kOutBufferLength = 3; + +int16_t const test_buffer_even_len[] = {0, 1, 2, 3, 4, 5}; +int16_t const test_buffer_odd_len[] = {0, 1, 2, 3, 4}; +int16_t test_buffer_out[kOutBufferLength]; + +TEST(DyadicDecimatorTest, GetOutLengthToDyadicDecimate) { + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, false)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, true)); + EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(5, false)); + EXPECT_EQ(2u, GetOutLengthToDyadicDecimate(5, true)); +} + + +TEST(DyadicDecimatorTest, DyadicDecimateErrorValues) { + size_t out_samples = 0; + + out_samples = DyadicDecimate(static_cast<int16_t*>(NULL), + kEvenBufferLength, + false, // Even sequence. + test_buffer_out, + kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + false, // Even sequence. + static_cast<int16_t*>(NULL), + kOutBufferLength); + EXPECT_EQ(0u, out_samples); + + // Less than required |out_length|. + out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + false, // Even sequence. + test_buffer_out, + 2); + EXPECT_EQ(0u, out_samples); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + false, // Even sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kEvenBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_even_len, + kEvenBufferLength, + true, // Odd sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); + EXPECT_EQ(5, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthEvenSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, false); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, + kOddBufferLength, + false, // Even sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(0, test_buffer_out[0]); + EXPECT_EQ(2, test_buffer_out[1]); + EXPECT_EQ(4, test_buffer_out[2]); +} + +TEST(DyadicDecimatorTest, DyadicDecimateOddLengthOddSequence) { + size_t expected_out_samples = + GetOutLengthToDyadicDecimate(kOddBufferLength, true); + + size_t out_samples = DyadicDecimate(test_buffer_odd_len, + kOddBufferLength, + true, // Odd sequence. + test_buffer_out, + kOutBufferLength); + + EXPECT_EQ(expected_out_samples, out_samples); + + EXPECT_EQ(1, test_buffer_out[0]); + EXPECT_EQ(3, test_buffer_out[1]); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils.cc new file mode 100644 index 0000000000..7bf2e084f8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils.cc @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/file_utils.h" + +#include <memory> + +#include "system_wrappers/include/file_wrapper.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) { + if (!bytes || !out) { + return -1; + } + + uint32_t binary_value = 0; + for (int i = 3; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast<float>(binary_value); + + return 0; +} + +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) { + if (!bytes || !out) { + return -1; + } + + uint64_t binary_value = 0; + for (int i = 7; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast<double>(binary_value); + + return 0; +} + +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) { + if (!out_bytes) { + return -1; + } + + uint32_t binary_value = bit_cast<uint32_t>(value); + for (size_t i = 0; i < 4; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) { + if (!out_bytes) { + return -1; + } + + uint64_t binary_value = bit_cast<uint64_t>(value); + for (size_t i = 0; i < 8; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<uint8_t[]> byte_array(new uint8_t[2]); + + size_t int16s_read = 0; + + while (int16s_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 2); + if (bytes_read < 2) { + break; + } + int16_t value = byte_array[1]; + value <<= 8; + value += byte_array[0]; + buffer[int16s_read] = value; + ++int16s_read; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<int16_t[]> buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<int16_t[]> buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadFloatBufferFromFile(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<uint8_t[]> byte_array(new uint8_t[4]); + + size_t floats_read = 0; + + while (floats_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 4); + if (bytes_read < 4) { + break; + } + ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]); + ++floats_read; + } + + return floats_read; +} + +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<uint8_t[]> byte_array(new uint8_t[8]); + + size_t doubles_read = 0; + + while (doubles_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 8); + if (bytes_read < 8) { + break; + } + ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]); + ++doubles_read; + } + + return doubles_read; +} + +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<uint8_t[]> byte_array(new uint8_t[2]); + + size_t int16s_written = 0; + + for (int16s_written = 0; int16s_written < length; ++int16s_written) { + // Get byte representation. + byte_array[0] = buffer[int16s_written] & 0xFF; + byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF; + + file->Write(byte_array.get(), 2); + } + + file->Flush(); + + return int16s_written; +} + +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<uint8_t[]> byte_array(new uint8_t[4]); + + size_t floats_written = 0; + + for (floats_written = 0; floats_written < length; ++floats_written) { + // Get byte representation. + ConvertFloatToByteArray(buffer[floats_written], byte_array.get()); + + file->Write(byte_array.get(), 4); + } + + file->Flush(); + + return floats_written; +} + +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer) { + if (!file || !file->is_open() || !buffer || length <= 0) { + return 0; + } + + std::unique_ptr<uint8_t[]> byte_array(new uint8_t[8]); + + size_t doubles_written = 0; + + for (doubles_written = 0; doubles_written < length; ++doubles_written) { + // Get byte representation. + ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get()); + + file->Write(byte_array.get(), 8); + } + + file->Flush(); + + return doubles_written; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils.h new file mode 100644 index 0000000000..3f05c1dd52 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ + +#include <string.h> + +#include "system_wrappers/include/file_wrapper.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +// This is a copy of the cast included in the Chromium codebase here: +// http://cs.chromium.org/src/third_party/cld/base/casts.h +template <class Dest, class Source> +inline Dest bit_cast(const Source& source) { + // A compile error here means your Dest and Source have different sizes. + static_assert(sizeof(Dest) == sizeof(Source), + "Dest and Source have different sizes"); + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Converts the byte array with binary float representation to float. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out); + +// Converts the byte array with binary double representation to double. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out); + +// Converts a float to a byte array with binary float representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]); + +// Converts a double to a byte array with binary double representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]); + +// Reads |length| 16-bit integers from |file| to |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer); + +// Reads |length| 16-bit integers from |file| and stores those values +// (converting them) in |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer); + +// Reads |length| 16-bit integers from |file| and stores those values +// (converting them) in |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer); + +// Reads |length| floats in binary representation (4 bytes) from |file| to +// |buffer|. +// |file| must be previously opened. +// Returns the number of floats read or -1 on error. +size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer); + +// Reads |length| doubles in binary representation (8 bytes) from |file| to +// |buffer|. +// |file| must be previously opened. +// Returns the number of doubles read or -1 on error. +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer); + +// Writes |length| 16-bit integers from |buffer| in binary representation (2 +// bytes) to |file|. It flushes |file|, so after this call there are no +// writings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer); + +// Writes |length| floats from |buffer| in binary representation (4 bytes) to +// |file|. It flushes |file|, so after this call there are no writtings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer); + +// Writes |length| doubles from |buffer| in binary representation (8 bytes) to +// |file|. It flushes |file|, so after this call there are no writings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils_unittest.cc new file mode 100644 index 0000000000..c5e03990b5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/file_utils_unittest.cc @@ -0,0 +1,539 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/file_utils.h" + +#include <string.h> +#include <string> +#include <memory> +#include <vector> + +#include "system_wrappers/include/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +static const uint8_t kPiBytesf[4] = {0xDB, 0x0F, 0x49, 0x40}; +static const uint8_t kEBytesf[4] = {0x54, 0xF8, 0x2D, 0x40}; +static const uint8_t kAvogadroBytesf[4] = {0x2F, 0x0C, 0xFF, 0x66}; + +static const uint8_t kPiBytes[8] = + {0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40}; +static const uint8_t kEBytes[8] = + {0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40}; +static const uint8_t kAvogadroBytes[8] = + {0xF4, 0xBC, 0xA8, 0xDF, 0x85, 0xE1, 0xDF, 0x44}; + +static const double kPi = 3.14159265358979323846; +static const double kE = 2.71828182845904523536; +static const double kAvogadro = 602214100000000000000000.0; + +class TransientFileUtilsTest: public ::testing::Test { + protected: + TransientFileUtilsTest() + : kTestFileName( + test::ResourcePath("audio_processing/transient/double-utils", + "dat")), + kTestFileNamef( + test::ResourcePath("audio_processing/transient/float-utils", + "dat")) {} + + ~TransientFileUtilsTest() override { + CleanupTempFiles(); + } + + std::string CreateTempFilename(const std::string& dir, + const std::string& prefix) { + std::string filename = test::TempFilename(dir, prefix); + temp_filenames_.push_back(filename); + return filename; + } + + void CleanupTempFiles() { + for (const std::string& filename : temp_filenames_) { + remove(filename.c_str()); + } + temp_filenames_.clear(); + } + + // This file (used in some tests) contains binary data. The data correspond to + // the double representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileName; + + // This file (used in some tests) contains binary data. The data correspond to + // the float representation of the constants: Pi, E, and the Avogadro's + // Number; + // appended in that order. + const std::string kTestFileNamef; + + // List of temporary filenames created by CreateTempFilename. + std::vector<std::string> temp_filenames_; +}; + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToFloat DISABLED_ConvertByteArrayToFloat +#else +#define MAYBE_ConvertByteArrayToFloat ConvertByteArrayToFloat +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToFloat) { + float value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value)); + EXPECT_FLOAT_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kEBytesf, &value)); + EXPECT_FLOAT_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToFloat(kAvogadroBytesf, &value)); + EXPECT_FLOAT_EQ(kAvogadro, value); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToDouble DISABLED_ConvertByteArrayToDouble +#else +#define MAYBE_ConvertByteArrayToDouble ConvertByteArrayToDouble +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToDouble) { + double value = 0.0; + + EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value)); + EXPECT_DOUBLE_EQ(kPi, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kEBytes, &value)); + EXPECT_DOUBLE_EQ(kE, value); + + EXPECT_EQ(0, ConvertByteArrayToDouble(kAvogadroBytes, &value)); + EXPECT_DOUBLE_EQ(kAvogadro, value); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertFloatToByteArray DISABLED_ConvertFloatToByteArray +#else +#define MAYBE_ConvertFloatToByteArray ConvertFloatToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertFloatToByteArray) { + std::unique_ptr<uint8_t[]> bytes(new uint8_t[4]); + + EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytesf, 4)); + + EXPECT_EQ(0, ConvertFloatToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4)); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertDoubleToByteArray DISABLED_ConvertDoubleToByteArray +#else +#define MAYBE_ConvertDoubleToByteArray ConvertDoubleToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertDoubleToByteArray) { + std::unique_ptr<uint8_t[]> bytes(new uint8_t[8]); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kPiBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kE, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kEBytes, 8)); + + EXPECT_EQ(0, ConvertDoubleToByteArray(kAvogadro, bytes.get())); + EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8)); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16BufferFromFile DISABLED_ReadInt16BufferFromFile +#else +#define MAYBE_ReadInt16BufferFromFile ReadInt16BufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16BufferFromFile) { + std::string test_filename = kTestFileName; + + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr<int16_t[]> buffer(new int16_t[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(), + kBufferLength, + buffer.get())); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new int16_t[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_EQ(11544, buffer[0]); + EXPECT_EQ(22377, buffer[4]); + EXPECT_EQ(16389, buffer[7]); + EXPECT_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToFloatBuffer \ + DISABLED_ReadInt16FromFileToFloatBuffer +#else +#define MAYBE_ReadInt16FromFileToFloatBuffer ReadInt16FromFileToFloatBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToFloatBuffer) { + std::string test_filename = kTestFileName; + + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr<float[]> buffer(new float[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16FromFileToFloatBuffer(file.get(), + kBufferLength, + buffer.get())); + + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToFloatBuffer(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToDoubleBuffer \ + DISABLED_ReadInt16FromFileToDoubleBuffer +#else +#define MAYBE_ReadInt16FromFileToDoubleBuffer ReadInt16FromFileToDoubleBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToDoubleBuffer) { + std::string test_filename = kTestFileName; + + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 12; + std::unique_ptr<double[]> buffer(new double[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadInt16FromFileToDoubleBuffer(file.get(), + kBufferLength, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // int16s read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, + ReadInt16FromFileToDoubleBuffer(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(11544, buffer[0]); + EXPECT_DOUBLE_EQ(22377, buffer[4]); + EXPECT_DOUBLE_EQ(16389, buffer[7]); + EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadFloatBufferFromFile DISABLED_ReadFloatBufferFromFile +#else +#define MAYBE_ReadFloatBufferFromFile ReadFloatBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadFloatBufferFromFile) { + std::string test_filename = kTestFileNamef; + + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kTestFileNamef.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr<float[]> buffer(new float[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(), + kBufferLength, + buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new float[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_FLOAT_EQ(kPi, buffer[0]); + EXPECT_FLOAT_EQ(kE, buffer[1]); + EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ReadDoubleBufferFromFile DISABLED_ReadDoubleBufferFromFile +#else +#define MAYBE_ReadDoubleBufferFromFile ReadDoubleBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadDoubleBufferFromFile) { + std::string test_filename = kTestFileName; + + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + file->OpenFile(test_filename.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr<double[]> buffer(new double[kBufferLength]); + + EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(), + kBufferLength, + buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); + + file->Rewind(); + + // The next test is for checking the case where there are not as much data as + // needed in the file, but reads to the end, and it returns the number of + // doubles read. + const size_t kBufferLenghtLargerThanFile = kBufferLength * 2; + buffer.reset(new double[kBufferLenghtLargerThanFile]); + EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(), + kBufferLenghtLargerThanFile, + buffer.get())); + EXPECT_DOUBLE_EQ(kPi, buffer[0]); + EXPECT_DOUBLE_EQ(kE, buffer[1]); + EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteInt16BufferToFile DISABLED_WriteInt16BufferToFile +#else +#define MAYBE_WriteInt16BufferToFile WriteInt16BufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteInt16BufferToFile) { + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + std::string kOutFileName = CreateTempFilename(test::OutputPath(), + "utils_test"); + + file->OpenFile(kOutFileName.c_str(), false); // Write mode. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr<int16_t[]> written_buffer(new int16_t[kBufferLength]); + std::unique_ptr<int16_t[]> read_buffer(new int16_t[kBufferLength]); + + written_buffer[0] = 1; + written_buffer[1] = 2; + written_buffer[2] = 3; + + EXPECT_EQ(kBufferLength, WriteInt16BufferToFile(file.get(), + kBufferLength, + written_buffer.get())); + + file->CloseFile(); + + file->OpenFile(kOutFileName.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(), + kBufferLength, + read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), + read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteFloatBufferToFile DISABLED_WriteFloatBufferToFile +#else +#define MAYBE_WriteFloatBufferToFile WriteFloatBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteFloatBufferToFile) { + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + std::string kOutFileName = CreateTempFilename(test::OutputPath(), + "utils_test"); + + file->OpenFile(kOutFileName.c_str(), false); // Write mode. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr<float[]> written_buffer(new float[kBufferLength]); + std::unique_ptr<float[]> read_buffer(new float[kBufferLength]); + + written_buffer[0] = static_cast<float>(kPi); + written_buffer[1] = static_cast<float>(kE); + written_buffer[2] = static_cast<float>(kAvogadro); + + EXPECT_EQ(kBufferLength, WriteFloatBufferToFile(file.get(), + kBufferLength, + written_buffer.get())); + + file->CloseFile(); + + file->OpenFile(kOutFileName.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(), + kBufferLength, + read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), + read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_WriteDoubleBufferToFile DISABLED_WriteDoubleBufferToFile +#else +#define MAYBE_WriteDoubleBufferToFile WriteDoubleBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteDoubleBufferToFile) { + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + std::string kOutFileName = CreateTempFilename(test::OutputPath(), + "utils_test"); + + file->OpenFile(kOutFileName.c_str(), false); // Write mode. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + const size_t kBufferLength = 3; + std::unique_ptr<double[]> written_buffer(new double[kBufferLength]); + std::unique_ptr<double[]> read_buffer(new double[kBufferLength]); + + written_buffer[0] = kPi; + written_buffer[1] = kE; + written_buffer[2] = kAvogadro; + + EXPECT_EQ(kBufferLength, WriteDoubleBufferToFile(file.get(), + kBufferLength, + written_buffer.get())); + + file->CloseFile(); + + file->OpenFile(kOutFileName.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kOutFileName.c_str(); + + EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(), + kBufferLength, + read_buffer.get())); + EXPECT_EQ(0, memcmp(written_buffer.get(), + read_buffer.get(), + kBufferLength * sizeof(written_buffer[0]))); +} + +#if defined(WEBRTC_IOS) +#define MAYBE_ExpectedErrorReturnValues DISABLED_ExpectedErrorReturnValues +#else +#define MAYBE_ExpectedErrorReturnValues ExpectedErrorReturnValues +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ExpectedErrorReturnValues) { + std::string test_filename = kTestFileName; + + double value; + std::unique_ptr<int16_t[]> int16_buffer(new int16_t[1]); + std::unique_ptr<double[]> double_buffer(new double[1]); + std::unique_ptr<FileWrapper> file(FileWrapper::Create()); + + EXPECT_EQ(-1, ConvertByteArrayToDouble(NULL, &value)); + EXPECT_EQ(-1, ConvertByteArrayToDouble(kPiBytes, NULL)); + + EXPECT_EQ(-1, ConvertDoubleToByteArray(kPi, NULL)); + + // Tests with file not opened. + EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), + 1, + double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, double_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, double_buffer.get())); + + file->OpenFile(test_filename.c_str(), true); // Read only. + ASSERT_TRUE(file->is_open()) << "File could not be opened:\n" + << kTestFileName.c_str(); + + EXPECT_EQ(0u, ReadInt16BufferFromFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 0, int16_buffer.get())); + + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), 1, NULL)); + EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), + 0, + double_buffer.get())); + + EXPECT_EQ(0u, ReadDoubleBufferFromFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 0, double_buffer.get())); + + EXPECT_EQ(0u, WriteInt16BufferToFile(NULL, 1, int16_buffer.get())); + EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 0, int16_buffer.get())); + + EXPECT_EQ(0u, WriteDoubleBufferToFile(NULL, 1, double_buffer.get())); + EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, NULL)); + EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 0, double_buffer.get())); +} + +} // namespace webrtc + diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments.cc new file mode 100644 index 0000000000..46b16b82bc --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/moving_moments.h" + +#include <math.h> +#include <string.h> + +#include "rtc_base/checks.h" + +namespace webrtc { + +MovingMoments::MovingMoments(size_t length) + : length_(length), + queue_(), + sum_(0.0), + sum_of_squares_(0.0) { + RTC_DCHECK_GT(length, 0); + for (size_t i = 0; i < length; ++i) { + queue_.push(0.0); + } +} + +MovingMoments::~MovingMoments() {} + +void MovingMoments::CalculateMoments(const float* in, size_t in_length, + float* first, float* second) { + RTC_DCHECK(in); + RTC_DCHECK_GT(in_length, 0); + RTC_DCHECK(first); + RTC_DCHECK(second); + + for (size_t i = 0; i < in_length; ++i) { + const float old_value = queue_.front(); + queue_.pop(); + queue_.push(in[i]); + + sum_ += in[i] - old_value; + sum_of_squares_ += in[i] * in[i] - old_value * old_value; + first[i] = sum_ / length_; + second[i] = sum_of_squares_ / length_; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments.h new file mode 100644 index 0000000000..f1b3e38cc0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ + +#include <stddef.h> + +#include <queue> + +namespace webrtc { + +// Calculates the first and second moments for each value of a buffer taking +// into account a given number of previous values. +// It preserves its state, so it can be multiple-called. +// TODO(chadan): Implement a function that takes a buffer of first moments and a +// buffer of second moments; and calculates the variances. When needed. +// TODO(chadan): Add functionality to update with a buffer but only output are +// the last values of the moments. When needed. +class MovingMoments { + public: + // Creates a Moving Moments object, that uses the last |length| values + // (including the new value introduced in every new calculation). + explicit MovingMoments(size_t length); + ~MovingMoments(); + + // Calculates the new values using |in|. Results will be in the out buffers. + // |first| and |second| must be allocated with at least |in_length|. + void CalculateMoments(const float* in, size_t in_length, + float* first, float* second); + + private: + size_t length_; + // A queue holding the |length_| latest input values. + std::queue<float> queue_; + // Sum of the values of the queue. + float sum_; + // Sum of the squares of the values of the queue. + float sum_of_squares_; +}; + +} // namespace webrtc + + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc new file mode 100644 index 0000000000..057bc454e0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/moving_moments_unittest.cc @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/moving_moments.h" + +#include <memory> + +#include "test/gtest.h" + +namespace webrtc { + +static const float kTolerance = 0.0001f; + +class MovingMomentsTest : public ::testing::Test { + protected: + static const size_t kMovingMomentsBufferLength = 5; + static const size_t kMaxOutputLength = 20; // Valid for this tests only. + + virtual void SetUp(); + // Calls CalculateMoments and verifies that it produces the expected + // outputs. + void CalculateMomentsAndVerify(const float* input, size_t input_length, + const float* expected_mean, + const float* expected_mean_squares); + + std::unique_ptr<MovingMoments> moving_moments_; + float output_mean_[kMaxOutputLength]; + float output_mean_squares_[kMaxOutputLength]; +}; + +const size_t MovingMomentsTest::kMaxOutputLength; + +void MovingMomentsTest::SetUp() { + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); +} + +void MovingMomentsTest::CalculateMomentsAndVerify( + const float* input, size_t input_length, + const float* expected_mean, + const float* expected_mean_squares) { + ASSERT_LE(input_length, kMaxOutputLength); + + moving_moments_->CalculateMoments(input, + input_length, + output_mean_, + output_mean_squares_); + + for (size_t i = 1; i < input_length; ++i) { + EXPECT_NEAR(expected_mean[i], output_mean_[i], kTolerance); + EXPECT_NEAR(expected_mean_squares[i], output_mean_squares_[i], kTolerance); + } +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnAllZerosBuffer) { + const float kInput[] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAConstantBuffer) { + const float kInput[] = {5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {1.f, 2.f, 3.f, 4.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f}; + const float expected_mean_squares[kInputLength] = + {5.f, 10.f, 15.f, 20.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnIncreasingBuffer) { + const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {0.2f, 0.6f, 1.2f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f}; + const float expected_mean_squares[kInputLength] = + {0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfADecreasingBuffer) { + const float kInput[] = + {-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {-0.2f, -0.6f, -1.2f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f}; + const float expected_mean_squares[kInputLength] = + {0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAZeroMeanSequence) { + const size_t kMovingMomentsBufferLength = 4; + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + const float kInput[] = + {1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {0.25f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; + const float expected_mean_squares[kInputLength] = + {0.25f, 0.5f, 0.75f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, CorrectMomentsOfAnArbitraryBuffer) { + const float kInput[] = + {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + const float expected_mean[kInputLength] = + {0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares[kInputLength] = + {0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f, + 0.0294f}; + + CalculateMomentsAndVerify(kInput, kInputLength, expected_mean, + expected_mean_squares); +} + +TEST_F(MovingMomentsTest, MutipleCalculateMomentsCalls) { + const float kInputFirstCall[] = + {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputFirstCallLength = sizeof(kInputFirstCall) / + sizeof(kInputFirstCall[0]); + const float kInputSecondCall[] = {0.29f, 0.31f}; + const size_t kInputSecondCallLength = sizeof(kInputSecondCall) / + sizeof(kInputSecondCall[0]); + const float kInputThirdCall[] = {0.37f, 0.41f, 0.43f, 0.47f}; + const size_t kInputThirdCallLength = sizeof(kInputThirdCall) / + sizeof(kInputThirdCall[0]); + + const float expected_mean_first_call[kInputFirstCallLength] = + {0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f}; + const float expected_mean_squares_first_call[kInputFirstCallLength] = + {0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f, + 0.0294f}; + + const float expected_mean_second_call[kInputSecondCallLength] = + {0.202f, 0.238f}; + const float expected_mean_squares_second_call[kInputSecondCallLength] = + {0.0438f, 0.0596f}; + + const float expected_mean_third_call[kInputThirdCallLength] = + {0.278f, 0.322f, 0.362f, 0.398f}; + const float expected_mean_squares_third_call[kInputThirdCallLength] = + {0.0812f, 0.1076f, 0.134f, 0.1614f}; + + CalculateMomentsAndVerify(kInputFirstCall, kInputFirstCallLength, + expected_mean_first_call, expected_mean_squares_first_call); + + CalculateMomentsAndVerify(kInputSecondCall, kInputSecondCallLength, + expected_mean_second_call, expected_mean_squares_second_call); + + CalculateMomentsAndVerify(kInputThirdCall, kInputThirdCallLength, + expected_mean_third_call, expected_mean_squares_third_call); +} + +TEST_F(MovingMomentsTest, + VerifySampleBasedVsBlockBasedCalculation) { + const float kInput[] = + {0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f}; + const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]); + + float output_mean_block_based[kInputLength]; + float output_mean_squares_block_based[kInputLength]; + + float output_mean_sample_based; + float output_mean_squares_sample_based; + + moving_moments_->CalculateMoments( + kInput, kInputLength, output_mean_block_based, + output_mean_squares_block_based); + moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength)); + for (size_t i = 0; i < kInputLength; ++i) { + moving_moments_->CalculateMoments( + &kInput[i], 1, &output_mean_sample_based, + &output_mean_squares_sample_based); + EXPECT_FLOAT_EQ(output_mean_block_based[i], output_mean_sample_based); + EXPECT_FLOAT_EQ(output_mean_squares_block_based[i], + output_mean_squares_sample_based); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/plotDetection.m b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/plotDetection.m new file mode 100644 index 0000000000..8e12ab920b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/plotDetection.m @@ -0,0 +1,22 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [] = plotDetection(PCMfile, DATfile, fs, chunkSize) +%[] = plotDetection(PCMfile, DATfile, fs, chunkSize) +% +%Plots the signal alongside the detection values. +% +%PCMfile: The file of the input signal in PCM format. +%DATfile: The file containing the detection values in binary float format. +%fs: The sample rate of the signal in Hertz. +%chunkSize: The chunk size used to compute the detection values in seconds. +[x, tx] = readPCM(PCMfile, fs); +[d, td] = readDetection(DATfile, fs, chunkSize); +plot(tx, x, td, d); diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/readDetection.m b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/readDetection.m new file mode 100644 index 0000000000..832bf31ec8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/readDetection.m @@ -0,0 +1,26 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [d, t] = readDetection(file, fs, chunkSize) +%[d, t] = readDetection(file, fs, chunkSize) +% +%Reads a detection signal from a DAT file. +% +%d: The detection signal. +%t: The respective time vector. +% +%file: The DAT file where the detection signal is stored in float format. +%fs: The signal sample rate in Hertz. +%chunkSize: The chunk size used for the detection in seconds. +fid = fopen(file); +d = fread(fid, inf, 'float'); +fclose(fid); +t = 0:(1 / fs):(length(d) * chunkSize - 1 / fs); +d = d(floor(t / chunkSize) + 1); diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/readPCM.m b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/readPCM.m new file mode 100644 index 0000000000..cd3cef8a3c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/test/readPCM.m @@ -0,0 +1,26 @@ +% +% Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +% +% Use of this source code is governed by a BSD-style license +% that can be found in the LICENSE file in the root of the source +% tree. An additional intellectual property rights grant can be found +% in the file PATENTS. All contributing project authors may +% be found in the AUTHORS file in the root of the source tree. +% + +function [x, t] = readPCM(file, fs) +%[x, t] = readPCM(file, fs) +% +%Reads a signal from a PCM file. +% +%x: The read signal after normalization. +%t: The respective time vector. +% +%file: The PCM file where the signal is stored in int16 format. +%fs: The signal sample rate in Hertz. +fid = fopen(file); +x = fread(fid, inf, 'int16'); +fclose(fid); +x = x - mean(x); +x = x / max(abs(x)); +t = 0:(1 / fs):((length(x) - 1) / fs); diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector.cc new file mode 100644 index 0000000000..1bb6f9f10a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector.cc @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_detector.h" + +#include <float.h> +#include <math.h> +#include <string.h> + +#include <algorithm> + +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "modules/audio_processing/transient/moving_moments.h" +#include "modules/audio_processing/transient/wpd_tree.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +static const int kTransientLengthMs = 30; +static const int kChunksAtStartupLeftToDelete = + kTransientLengthMs / ts::kChunkSizeMs; +static const float kDetectThreshold = 16.f; + +TransientDetector::TransientDetector(int sample_rate_hz) + : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000), + last_first_moment_(), + last_second_moment_(), + chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete), + reference_energy_(1.f), + using_reference_(false) { + RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz || + sample_rate_hz == ts::kSampleRate16kHz || + sample_rate_hz == ts::kSampleRate32kHz || + sample_rate_hz == ts::kSampleRate48kHz); + int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000; + // Adjustment to avoid data loss while downsampling, making + // |samples_per_chunk_| and |samples_per_transient| always divisible by + // |kLeaves|. + samples_per_chunk_ -= samples_per_chunk_ % kLeaves; + samples_per_transient -= samples_per_transient % kLeaves; + + tree_leaves_data_length_ = samples_per_chunk_ / kLeaves; + wpd_tree_.reset(new WPDTree(samples_per_chunk_, + kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, + kDaubechies8CoefficientsLength, + kLevels)); + for (size_t i = 0; i < kLeaves; ++i) { + moving_moments_[i].reset( + new MovingMoments(samples_per_transient / kLeaves)); + } + + first_moments_.reset(new float[tree_leaves_data_length_]); + second_moments_.reset(new float[tree_leaves_data_length_]); + + for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) { + previous_results_.push_back(0.f); + } +} + +TransientDetector::~TransientDetector() {} + +float TransientDetector::Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length) { + RTC_DCHECK(data); + RTC_DCHECK_EQ(samples_per_chunk_, data_length); + + // TODO(aluebs): Check if these errors can logically happen and if not assert + // on them. + if (wpd_tree_->Update(data, samples_per_chunk_) != 0) { + return -1.f; + } + + float result = 0.f; + + for (size_t i = 0; i < kLeaves; ++i) { + WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i); + + moving_moments_[i]->CalculateMoments(leaf->data(), + tree_leaves_data_length_, + first_moments_.get(), + second_moments_.get()); + + // Add value delayed (Use the last moments from the last call to Detect). + float unbiased_data = leaf->data()[0] - last_first_moment_[i]; + result += + unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN); + + // Add new values. + for (size_t j = 1; j < tree_leaves_data_length_; ++j) { + unbiased_data = leaf->data()[j] - first_moments_[j - 1]; + result += + unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN); + } + + last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1]; + last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1]; + } + + result /= tree_leaves_data_length_; + + result *= ReferenceDetectionValue(reference_data, reference_length); + + if (chunks_at_startup_left_to_delete_ > 0) { + chunks_at_startup_left_to_delete_--; + result = 0.f; + } + + if (result >= kDetectThreshold) { + result = 1.f; + } else { + // Get proportional value. + // Proportion achieved with a squared raised cosine function with domain + // [0, kDetectThreshold) and image [0, 1), it's always increasing. + const float horizontal_scaling = ts::kPi / kDetectThreshold; + const float kHorizontalShift = ts::kPi; + const float kVerticalScaling = 0.5f; + const float kVerticalShift = 1.f; + + result = (cos(result * horizontal_scaling + kHorizontalShift) + + kVerticalShift) * kVerticalScaling; + result *= result; + } + + previous_results_.pop_front(); + previous_results_.push_back(result); + + // In the current implementation we return the max of the current result and + // the previous results, so the high results have a width equals to + // |transient_length|. + return *std::max_element(previous_results_.begin(), previous_results_.end()); +} + +// Looks for the highest slope and compares it with the previous ones. +// An exponential transformation takes this to the [0, 1] range. This value is +// multiplied by the detection result to avoid false positives. +float TransientDetector::ReferenceDetectionValue(const float* data, + size_t length) { + if (data == NULL) { + using_reference_ = false; + return 1.f; + } + static const float kEnergyRatioThreshold = 0.2f; + static const float kReferenceNonLinearity = 20.f; + static const float kMemory = 0.99f; + float reference_energy = 0.f; + for (size_t i = 1; i < length; ++i) { + reference_energy += data[i] * data[i]; + } + if (reference_energy == 0.f) { + using_reference_ = false; + return 1.f; + } + RTC_DCHECK_NE(0, reference_energy_); + float result = 1.f / (1.f + exp(kReferenceNonLinearity * + (kEnergyRatioThreshold - + reference_energy / reference_energy_))); + reference_energy_ = + kMemory * reference_energy_ + (1.f - kMemory) * reference_energy; + + using_reference_ = true; + + return result; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector.h new file mode 100644 index 0000000000..3267b3a6cd --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ + +#include <deque> +#include <memory> + +#include "modules/audio_processing/transient/moving_moments.h" +#include "modules/audio_processing/transient/wpd_tree.h" + +namespace webrtc { + +// This is an implementation of the transient detector described in "Causal +// Wavelet based transient detector". +// Calculates the log-likelihood of a transient to happen on a signal at any +// given time based on the previous samples; it uses a WPD tree to analyze the +// signal. It preserves its state, so it can be multiple-called. +class TransientDetector { + public: + // TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree + // of 3 levels. Make an overloaded constructor to allow different wavelets and + // depths of the tree. When needed. + + // Creates a wavelet based transient detector. + TransientDetector(int sample_rate_hz); + + ~TransientDetector(); + + // Calculates the log-likelihood of the existence of a transient in |data|. + // |data_length| has to be equal to |samples_per_chunk_|. + // Returns a value between 0 and 1, as a non linear representation of this + // likelihood. + // Returns a negative value on error. + float Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length); + + bool using_reference() { return using_reference_; } + + private: + float ReferenceDetectionValue(const float* data, size_t length); + + static const size_t kLevels = 3; + static const size_t kLeaves = 1 << kLevels; + + size_t samples_per_chunk_; + + std::unique_ptr<WPDTree> wpd_tree_; + size_t tree_leaves_data_length_; + + // A MovingMoments object is needed for each leaf in the WPD tree. + std::unique_ptr<MovingMoments> moving_moments_[kLeaves]; + + std::unique_ptr<float[]> first_moments_; + std::unique_ptr<float[]> second_moments_; + + // Stores the last calculated moments from the previous detection. + float last_first_moment_[kLeaves]; + float last_second_moment_[kLeaves]; + + // We keep track of the previous results from the previous chunks, so it can + // be used to effectively give results according to the |transient_length|. + std::deque<float> previous_results_; + + // Number of chunks that are going to return only zeros at the beginning of + // the detection. It helps to avoid infs and nans due to the lack of + // information. + int chunks_at_startup_left_to_delete_; + + float reference_energy_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc new file mode 100644 index 0000000000..96af179c42 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_detector.h" + +#include <memory> +#include <sstream> +#include <string> + +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/file_utils.h" +#include "system_wrappers/include/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +static const int kSampleRatesHz[] = {ts::kSampleRate8kHz, + ts::kSampleRate16kHz, + ts::kSampleRate32kHz, + ts::kSampleRate48kHz}; +static const size_t kNumberOfSampleRates = + sizeof(kSampleRatesHz) / sizeof(*kSampleRatesHz); + +// This test is for the correctness of the transient detector. +// Checks the results comparing them with the ones stored in the detect files in +// the directory: resources/audio_processing/transient/ +// The files contain all the results in double precision (Little endian). +// The audio files used with different sample rates are stored in the same +// directory. +#if defined(WEBRTC_IOS) +TEST(TransientDetectorTest, DISABLED_CorrectnessBasedOnFiles) { +#else +TEST(TransientDetectorTest, CorrectnessBasedOnFiles) { +#endif + for (size_t i = 0; i < kNumberOfSampleRates; ++i) { + int sample_rate_hz = kSampleRatesHz[i]; + + // Prepare detect file. + std::stringstream detect_file_name; + detect_file_name << "audio_processing/transient/detect" + << (sample_rate_hz / 1000) << "kHz"; + + std::unique_ptr<FileWrapper> detect_file(FileWrapper::Create()); + + detect_file->OpenFile( + test::ResourcePath(detect_file_name.str(), "dat").c_str(), + true); // Read only. + + bool file_opened = detect_file->is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" + << detect_file_name.str().c_str(); + + // Prepare audio file. + std::stringstream audio_file_name; + audio_file_name << "audio_processing/transient/audio" + << (sample_rate_hz / 1000) << "kHz"; + + std::unique_ptr<FileWrapper> audio_file(FileWrapper::Create()); + + audio_file->OpenFile( + test::ResourcePath(audio_file_name.str(), "pcm").c_str(), + true); // Read only. + + // Create detector. + TransientDetector detector(sample_rate_hz); + + const size_t buffer_length = sample_rate_hz * ts::kChunkSizeMs / 1000; + std::unique_ptr<float[]> buffer(new float[buffer_length]); + + const float kTolerance = 0.02f; + + size_t frames_read = 0; + + while (ReadInt16FromFileToFloatBuffer(audio_file.get(), + buffer_length, + buffer.get()) == buffer_length) { + ++frames_read; + + float detector_value = + detector.Detect(buffer.get(), buffer_length, NULL, 0); + double file_value; + ASSERT_EQ(1u, ReadDoubleBufferFromFile(detect_file.get(), 1, &file_value)) + << "Detect test file is malformed.\n"; + + // Compare results with data from the matlab test file. + EXPECT_NEAR(file_value, detector_value, kTolerance) << "Frame: " + << frames_read; + } + + detect_file->CloseFile(); + audio_file->CloseFile(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppression_test.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppression_test.cc new file mode 100644 index 0000000000..3442ee0af6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppression_test.cc @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_suppressor.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <memory> +#include <string> + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc/agc.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/flags.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" +#include "typedefs.h" // NOLINT(build/include) + +DEFINE_string(in_file_name, "", "PCM file that contains the signal."); +DEFINE_string(detection_file_name, + "", + "PCM file that contains the detection signal."); +DEFINE_string(reference_file_name, + "", + "PCM file that contains the reference signal."); + +DEFINE_int(chunk_size_ms, + 10, + "Time between each chunk of samples in milliseconds."); + +DEFINE_int(sample_rate_hz, + 16000, + "Sampling frequency of the signal in Hertz."); +DEFINE_int(detection_rate_hz, + 0, + "Sampling frequency of the detection signal in Hertz."); + +DEFINE_int(num_channels, 1, "Number of channels."); + +DEFINE_bool(help, false, "Print this message."); + +namespace webrtc { + +const char kUsage[] = + "\nDetects and suppresses transients from file.\n\n" + "This application loads the signal from the in_file_name with a specific\n" + "num_channels and sample_rate_hz, the detection signal from the\n" + "detection_file_name with a specific detection_rate_hz, and the reference\n" + "signal from the reference_file_name with sample_rate_hz, divides them\n" + "into chunk_size_ms blocks, computes its voice value and depending on the\n" + "voice_threshold does the respective restoration. You can always get the\n" + "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n" + "1 respectively.\n\n"; + +// Read next buffers from the test files (signed 16-bit host-endian PCM +// format). audio_buffer has int16 samples, detection_buffer has float samples +// with range [-32768,32767], and reference_buffer has float samples with range +// [-1,1]. Return true iff all the buffers were filled completely. +bool ReadBuffers(FILE* in_file, + size_t audio_buffer_size, + int num_channels, + int16_t* audio_buffer, + FILE* detection_file, + size_t detection_buffer_size, + float* detection_buffer, + FILE* reference_file, + float* reference_buffer) { + std::unique_ptr<int16_t[]> tmpbuf; + int16_t* read_ptr = audio_buffer; + if (num_channels > 1) { + tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]); + read_ptr = tmpbuf.get(); + } + if (fread(read_ptr, + sizeof(*read_ptr), + num_channels * audio_buffer_size, + in_file) != num_channels * audio_buffer_size) { + return false; + } + // De-interleave. + if (num_channels > 1) { + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < audio_buffer_size; ++j) { + audio_buffer[i * audio_buffer_size + j] = + read_ptr[i + j * num_channels]; + } + } + } + if (detection_file) { + std::unique_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size, + detection_file) != detection_buffer_size) + return false; + for (size_t i = 0; i < detection_buffer_size; ++i) + detection_buffer[i] = ibuf[i]; + } + if (reference_file) { + std::unique_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]); + if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) + != audio_buffer_size) + return false; + S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer); + } + return true; +} + +// Write a number of samples to an open signed 16-bit host-endian PCM file. +static void WritePCM(FILE* f, + size_t num_samples, + int num_channels, + const float* buffer) { + std::unique_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]); + // Interleave. + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_samples; ++j) { + ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]); + } + } + fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f); +} + +// This application tests the transient suppression by providing a processed +// PCM file, which has to be listened to in order to evaluate the +// performance. +// It gets an audio file, and its voice gain information, and the suppressor +// process it giving the output file "suppressed_keystrokes.pcm". +void void_main() { + // TODO(aluebs): Remove all FileWrappers. + // Prepare the input file. + FILE* in_file = fopen(FLAG_in_file_name, "rb"); + ASSERT_TRUE(in_file != NULL); + + // Prepare the detection file. + FILE* detection_file = NULL; + if (strlen(FLAG_detection_file_name) > 0) { + detection_file = fopen(FLAG_detection_file_name, "rb"); + } + + // Prepare the reference file. + FILE* reference_file = NULL; + if (strlen(FLAG_reference_file_name) > 0) { + reference_file = fopen(FLAG_reference_file_name, "rb"); + } + + // Prepare the output file. + std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm"; + FILE* out_file = fopen(out_file_name.c_str(), "wb"); + ASSERT_TRUE(out_file != NULL); + + int detection_rate_hz = FLAG_detection_rate_hz; + if (detection_rate_hz == 0) { + detection_rate_hz = FLAG_sample_rate_hz; + } + + Agc agc; + + TransientSuppressor suppressor; + suppressor.Initialize( + FLAG_sample_rate_hz, detection_rate_hz, FLAG_num_channels); + + const size_t audio_buffer_size = + FLAG_chunk_size_ms * FLAG_sample_rate_hz / 1000; + const size_t detection_buffer_size = + FLAG_chunk_size_ms * detection_rate_hz / 1000; + + // int16 and float variants of the same data. + std::unique_ptr<int16_t[]> audio_buffer_i( + new int16_t[FLAG_num_channels * audio_buffer_size]); + std::unique_ptr<float[]> audio_buffer_f( + new float[FLAG_num_channels * audio_buffer_size]); + + std::unique_ptr<float[]> detection_buffer, reference_buffer; + + if (detection_file) + detection_buffer.reset(new float[detection_buffer_size]); + if (reference_file) + reference_buffer.reset(new float[audio_buffer_size]); + + while (ReadBuffers(in_file, + audio_buffer_size, + FLAG_num_channels, + audio_buffer_i.get(), + detection_file, + detection_buffer_size, + detection_buffer.get(), + reference_file, + reference_buffer.get())) { + ASSERT_EQ(0, + agc.Process(audio_buffer_i.get(), + static_cast<int>(audio_buffer_size), + FLAG_sample_rate_hz)) + << "The AGC could not process the frame"; + + for (size_t i = 0; i < FLAG_num_channels * audio_buffer_size; ++i) { + audio_buffer_f[i] = audio_buffer_i[i]; + } + + ASSERT_EQ(0, + suppressor.Suppress(audio_buffer_f.get(), + audio_buffer_size, + FLAG_num_channels, + detection_buffer.get(), + detection_buffer_size, + reference_buffer.get(), + audio_buffer_size, + agc.voice_probability(), + true)) + << "The transient suppressor could not suppress the frame"; + + // Write result to out file. + WritePCM( + out_file, audio_buffer_size, FLAG_num_channels, audio_buffer_f.get()); + } + + fclose(in_file); + if (detection_file) { + fclose(detection_file); + } + if (reference_file) { + fclose(reference_file); + } + fclose(out_file); +} + +} // namespace webrtc + +int main(int argc, char* argv[]) { + if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) || + FLAG_help || argc != 1) { + printf("%s", webrtc::kUsage); + if (FLAG_help) { + rtc::FlagList::Print(nullptr, false); + return 0; + } + return 1; + } + RTC_CHECK_GT(FLAG_chunk_size_ms, 0); + RTC_CHECK_GT(FLAG_sample_rate_hz, 0); + RTC_CHECK_GT(FLAG_num_channels, 0); + + webrtc::void_main(); + return 0; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor.cc new file mode 100644 index 0000000000..9bbd7d9500 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor.cc @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_suppressor.h" + +#include <math.h> +#include <string.h> +#include <cmath> +#include <complex> +#include <deque> +#include <set> + +#include "common_audio/fft4g.h" +#include "common_audio/include/audio_util.h" +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "modules/audio_processing/ns/windows_private.h" +#include "modules/audio_processing/transient/common.h" +#include "modules/audio_processing/transient/transient_detector.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +static const float kMeanIIRCoefficient = 0.5f; +static const float kVoiceThreshold = 0.02f; + +// TODO(aluebs): Check if these values work also for 48kHz. +static const size_t kMinVoiceBin = 3; +static const size_t kMaxVoiceBin = 60; + +namespace { + +float ComplexMagnitude(float a, float b) { + return std::abs(a) + std::abs(b); +} + +} // namespace + +TransientSuppressor::TransientSuppressor() + : data_length_(0), + detection_length_(0), + analysis_length_(0), + buffer_delay_(0), + complex_analysis_length_(0), + num_channels_(0), + window_(NULL), + detector_smoothed_(0.f), + keypress_counter_(0), + chunks_since_keypress_(0), + detection_enabled_(false), + suppression_enabled_(false), + use_hard_restoration_(false), + chunks_since_voice_change_(0), + seed_(182), + using_reference_(false) { +} + +TransientSuppressor::~TransientSuppressor() {} + +int TransientSuppressor::Initialize(int sample_rate_hz, + int detection_rate_hz, + int num_channels) { + switch (sample_rate_hz) { + case ts::kSampleRate8kHz: + analysis_length_ = 128u; + window_ = kBlocks80w128; + break; + case ts::kSampleRate16kHz: + analysis_length_ = 256u; + window_ = kBlocks160w256; + break; + case ts::kSampleRate32kHz: + analysis_length_ = 512u; + window_ = kBlocks320w512; + break; + case ts::kSampleRate48kHz: + analysis_length_ = 1024u; + window_ = kBlocks480w1024; + break; + default: + return -1; + } + if (detection_rate_hz != ts::kSampleRate8kHz && + detection_rate_hz != ts::kSampleRate16kHz && + detection_rate_hz != ts::kSampleRate32kHz && + detection_rate_hz != ts::kSampleRate48kHz) { + return -1; + } + if (num_channels <= 0) { + return -1; + } + + detector_.reset(new TransientDetector(detection_rate_hz)); + data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000; + if (data_length_ > analysis_length_) { + RTC_NOTREACHED(); + return -1; + } + buffer_delay_ = analysis_length_ - data_length_; + + complex_analysis_length_ = analysis_length_ / 2 + 1; + RTC_DCHECK_GE(complex_analysis_length_, kMaxVoiceBin); + num_channels_ = num_channels; + in_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(in_buffer_.get(), + 0, + analysis_length_ * num_channels_ * sizeof(in_buffer_[0])); + detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000; + detection_buffer_.reset(new float[detection_length_]); + memset(detection_buffer_.get(), + 0, + detection_length_ * sizeof(detection_buffer_[0])); + out_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(out_buffer_.get(), + 0, + analysis_length_ * num_channels_ * sizeof(out_buffer_[0])); + // ip[0] must be zero to trigger initialization using rdft(). + size_t ip_length = 2 + sqrtf(analysis_length_); + ip_.reset(new size_t[ip_length]()); + memset(ip_.get(), 0, ip_length * sizeof(ip_[0])); + wfft_.reset(new float[complex_analysis_length_ - 1]); + memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0])); + spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]); + memset(spectral_mean_.get(), + 0, + complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0])); + fft_buffer_.reset(new float[analysis_length_ + 2]); + memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0])); + magnitudes_.reset(new float[complex_analysis_length_]); + memset(magnitudes_.get(), + 0, + complex_analysis_length_ * sizeof(magnitudes_[0])); + mean_factor_.reset(new float[complex_analysis_length_]); + + static const float kFactorHeight = 10.f; + static const float kLowSlope = 1.f; + static const float kHighSlope = 0.3f; + for (size_t i = 0; i < complex_analysis_length_; ++i) { + mean_factor_[i] = + kFactorHeight / + (1.f + exp(kLowSlope * static_cast<int>(i - kMinVoiceBin))) + + kFactorHeight / + (1.f + exp(kHighSlope * static_cast<int>(kMaxVoiceBin - i))); + } + detector_smoothed_ = 0.f; + keypress_counter_ = 0; + chunks_since_keypress_ = 0; + detection_enabled_ = false; + suppression_enabled_ = false; + use_hard_restoration_ = false; + chunks_since_voice_change_ = 0; + seed_ = 182; + using_reference_ = false; + return 0; +} + +int TransientSuppressor::Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) { + if (!data || data_length != data_length_ || num_channels != num_channels_ || + detection_length != detection_length_ || voice_probability < 0 || + voice_probability > 1) { + return -1; + } + + UpdateKeypress(key_pressed); + UpdateBuffers(data); + + int result = 0; + if (detection_enabled_) { + UpdateRestoration(voice_probability); + + if (!detection_data) { + // Use the input data of the first channel if special detection data is + // not supplied. + detection_data = &in_buffer_[buffer_delay_]; + } + + float detector_result = detector_->Detect( + detection_data, detection_length, reference_data, reference_length); + if (detector_result < 0) { + return -1; + } + + using_reference_ = detector_->using_reference(); + + // |detector_smoothed_| follows the |detector_result| when this last one is + // increasing, but has an exponential decaying tail to be able to suppress + // the ringing of keyclicks. + float smooth_factor = using_reference_ ? 0.6 : 0.1; + detector_smoothed_ = detector_result >= detector_smoothed_ + ? detector_result + : smooth_factor * detector_smoothed_ + + (1 - smooth_factor) * detector_result; + + for (int i = 0; i < num_channels_; ++i) { + Suppress(&in_buffer_[i * analysis_length_], + &spectral_mean_[i * complex_analysis_length_], + &out_buffer_[i * analysis_length_]); + } + } + + // If the suppression isn't enabled, we use the in buffer to delay the signal + // appropriately. This also gives time for the out buffer to be refreshed with + // new data between detection and suppression getting enabled. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&data[i * data_length_], + suppression_enabled_ ? &out_buffer_[i * analysis_length_] + : &in_buffer_[i * analysis_length_], + data_length_ * sizeof(*data)); + } + return result; +} + +// This should only be called when detection is enabled. UpdateBuffers() must +// have been called. At return, |out_buffer_| will be filled with the +// processed output. +void TransientSuppressor::Suppress(float* in_ptr, + float* spectral_mean, + float* out_ptr) { + // Go to frequency domain. + for (size_t i = 0; i < analysis_length_; ++i) { + // TODO(aluebs): Rename windows + fft_buffer_[i] = in_ptr[i] * window_[i]; + } + + WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get()); + + // Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end + // for convenience. + fft_buffer_[analysis_length_] = fft_buffer_[1]; + fft_buffer_[analysis_length_ + 1] = 0.f; + fft_buffer_[1] = 0.f; + + for (size_t i = 0; i < complex_analysis_length_; ++i) { + magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2], + fft_buffer_[i * 2 + 1]); + } + // Restore audio if necessary. + if (suppression_enabled_) { + if (use_hard_restoration_) { + HardRestoration(spectral_mean); + } else { + SoftRestoration(spectral_mean); + } + } + + // Update the spectral mean. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] + + kMeanIIRCoefficient * magnitudes_[i]; + } + + // Back to time domain. + // Put R[n/2] back in fft_buffer_[1]. + fft_buffer_[1] = fft_buffer_[analysis_length_]; + + WebRtc_rdft(analysis_length_, + -1, + fft_buffer_.get(), + ip_.get(), + wfft_.get()); + const float fft_scaling = 2.f / analysis_length_; + + for (size_t i = 0; i < analysis_length_; ++i) { + out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling; + } +} + +void TransientSuppressor::UpdateKeypress(bool key_pressed) { + const int kKeypressPenalty = 1000 / ts::kChunkSizeMs; + const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs; + const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds. + + if (key_pressed) { + keypress_counter_ += kKeypressPenalty; + chunks_since_keypress_ = 0; + detection_enabled_ = true; + } + keypress_counter_ = std::max(0, keypress_counter_ - 1); + + if (keypress_counter_ > kIsTypingThreshold) { + if (!suppression_enabled_) { + RTC_LOG(LS_INFO) << "[ts] Transient suppression is now enabled."; + } + suppression_enabled_ = true; + keypress_counter_ = 0; + } + + if (detection_enabled_ && + ++chunks_since_keypress_ > kChunksUntilNotTyping) { + if (suppression_enabled_) { + RTC_LOG(LS_INFO) << "[ts] Transient suppression is now disabled."; + } + detection_enabled_ = false; + suppression_enabled_ = false; + keypress_counter_ = 0; + } +} + +void TransientSuppressor::UpdateRestoration(float voice_probability) { + const int kHardRestorationOffsetDelay = 3; + const int kHardRestorationOnsetDelay = 80; + + bool not_voiced = voice_probability < kVoiceThreshold; + + if (not_voiced == use_hard_restoration_) { + chunks_since_voice_change_ = 0; + } else { + ++chunks_since_voice_change_; + + if ((use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOffsetDelay) || + (!use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOnsetDelay)) { + use_hard_restoration_ = not_voiced; + chunks_since_voice_change_ = 0; + } + } +} + +// Shift buffers to make way for new data. Must be called after +// |detection_enabled_| is updated by UpdateKeypress(). +void TransientSuppressor::UpdateBuffers(float* data) { + // TODO(aluebs): Change to ring buffer. + memmove(in_buffer_.get(), + &in_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(in_buffer_[0])); + // Copy new chunk to buffer. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_], + &data[i * data_length_], + data_length_ * sizeof(*data)); + } + if (detection_enabled_) { + // Shift previous chunk in out buffer. + memmove(out_buffer_.get(), + &out_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(out_buffer_[0])); + // Initialize new chunk in out buffer. + for (int i = 0; i < num_channels_; ++i) { + memset(&out_buffer_[buffer_delay_ + i * analysis_length_], + 0, + data_length_ * sizeof(out_buffer_[0])); + } + } +} + +// Restores the unvoiced signal if a click is present. +// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds +// the spectral mean. The attenuation depends on |detector_smoothed_|. +// If a restoration takes place, the |magnitudes_| are updated to the new value. +void TransientSuppressor::HardRestoration(float* spectral_mean) { + const float detector_result = + 1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f); + // To restore, we get the peaks in the spectrum. If higher than the previous + // spectral mean we adjust them. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) { + // RandU() generates values on [0, int16::max()] + const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) / + std::numeric_limits<int16_t>::max(); + const float scaled_mean = detector_result * spectral_mean[i]; + + fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] + + scaled_mean * cosf(phase); + fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] + + scaled_mean * sinf(phase); + magnitudes_[i] = magnitudes_[i] - + detector_result * (magnitudes_[i] - spectral_mean[i]); + } + } +} + +// Restores the voiced signal if a click is present. +// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds +// the spectral mean and that is lower than some function of the current block +// frequency mean. The attenuation depends on |detector_smoothed_|. +// If a restoration takes place, the |magnitudes_| are updated to the new value. +void TransientSuppressor::SoftRestoration(float* spectral_mean) { + // Get the spectral magnitude mean of the current block. + float block_frequency_mean = 0; + for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) { + block_frequency_mean += magnitudes_[i]; + } + block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin); + + // To restore, we get the peaks in the spectrum. If higher than the + // previous spectral mean and lower than a factor of the block mean + // we adjust them. The factor is a double sigmoid that has a minimum in the + // voice frequency range (300Hz - 3kHz). + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 && + (using_reference_ || + magnitudes_[i] < block_frequency_mean * mean_factor_[i])) { + const float new_magnitude = + magnitudes_[i] - + detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]); + const float magnitude_ratio = new_magnitude / magnitudes_[i]; + + fft_buffer_[i * 2] *= magnitude_ratio; + fft_buffer_[i * 2 + 1] *= magnitude_ratio; + magnitudes_[i] = new_magnitude; + } + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor.h new file mode 100644 index 0000000000..27b096c795 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ + +#include <deque> +#include <memory> +#include <set> + +#include "rtc_base/gtest_prod_util.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class TransientDetector; + +// Detects transients in an audio stream and suppress them using a simple +// restoration algorithm that attenuates unexpected spikes in the spectrum. +class TransientSuppressor { + public: + TransientSuppressor(); + ~TransientSuppressor(); + + int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels); + + // Processes a |data| chunk, and returns it with keystrokes suppressed from + // it. The float format is assumed to be int16 ranged. If there are more than + // one channel, the chunks are concatenated one after the other in |data|. + // |data_length| must be equal to |data_length_|. + // |num_channels| must be equal to |num_channels_|. + // A sub-band, ideally the higher, can be used as |detection_data|. If it is + // NULL, |data| is used for the detection too. The |detection_data| is always + // assumed mono. + // If a reference signal (e.g. keyboard microphone) is available, it can be + // passed in as |reference_data|. It is assumed mono and must have the same + // length as |data|. NULL is accepted if unavailable. + // This suppressor performs better if voice information is available. + // |voice_probability| is the probability of voice being present in this chunk + // of audio. If voice information is not available, |voice_probability| must + // always be set to 1. + // |key_pressed| determines if a key was pressed on this audio chunk. + // Returns 0 on success and -1 otherwise. + int Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed); + + private: + FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest, + TypingDetectionLogicWorksAsExpectedForMono); + void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr); + + void UpdateKeypress(bool key_pressed); + void UpdateRestoration(float voice_probability); + + void UpdateBuffers(float* data); + + void HardRestoration(float* spectral_mean); + void SoftRestoration(float* spectral_mean); + + std::unique_ptr<TransientDetector> detector_; + + size_t data_length_; + size_t detection_length_; + size_t analysis_length_; + size_t buffer_delay_; + size_t complex_analysis_length_; + int num_channels_; + // Input buffer where the original samples are stored. + std::unique_ptr<float[]> in_buffer_; + std::unique_ptr<float[]> detection_buffer_; + // Output buffer where the restored samples are stored. + std::unique_ptr<float[]> out_buffer_; + + // Arrays for fft. + std::unique_ptr<size_t[]> ip_; + std::unique_ptr<float[]> wfft_; + + std::unique_ptr<float[]> spectral_mean_; + + // Stores the data for the fft. + std::unique_ptr<float[]> fft_buffer_; + + std::unique_ptr<float[]> magnitudes_; + + const float* window_; + + std::unique_ptr<float[]> mean_factor_; + + float detector_smoothed_; + + int keypress_counter_; + int chunks_since_keypress_; + bool detection_enabled_; + bool suppression_enabled_; + + bool use_hard_restoration_; + int chunks_since_voice_change_; + + uint32_t seed_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc new file mode 100644 index 0000000000..32d9858c64 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/transient_suppressor_unittest.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/transient_suppressor.h" + +#include "modules/audio_processing/transient/common.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(TransientSuppressorTest, TypingDetectionLogicWorksAsExpectedForMono) { + static const int kNumChannels = 1; + + TransientSuppressor ts; + ts.Initialize(ts::kSampleRate16kHz, ts::kSampleRate16kHz, kNumChannels); + + // Each key-press enables detection. + EXPECT_FALSE(ts.detection_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + + // It takes four seconds without any key-press to disable the detection + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + } + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + + // Key-presses that are more than a second apart from each other don't enable + // suppression. + for (int i = 0; i < 100; ++i) { + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + for (int time_ms = 0; time_ms < 990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } + ts.UpdateKeypress(false); + } + + // Two consecutive key-presses is enough to enable the suppression. + ts.UpdateKeypress(true); + EXPECT_FALSE(ts.suppression_enabled_); + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.suppression_enabled_); + + // Key-presses that are less than a second apart from each other don't disable + // detection nor suppression. + for (int i = 0; i < 100; ++i) { + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + ts.UpdateKeypress(true); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + + // It takes four seconds without any key-press to disable the detection and + // suppression. + for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_TRUE(ts.detection_enabled_); + EXPECT_TRUE(ts.suppression_enabled_); + } + for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) { + ts.UpdateKeypress(false); + EXPECT_FALSE(ts.detection_enabled_); + EXPECT_FALSE(ts.suppression_enabled_); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node.cc new file mode 100644 index 0000000000..20d6a9013e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_node.h" + +#include <math.h> +#include <string.h> + +#include "common_audio/fir_filter.h" +#include "common_audio/fir_filter_factory.h" +#include "modules/audio_processing/transient/dyadic_decimator.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WPDNode::WPDNode(size_t length, + const float* coefficients, + size_t coefficients_length) + : // The data buffer has parent data length to be able to contain and filter + // it. + data_(new float[2 * length + 1]), + length_(length), + filter_(CreateFirFilter(coefficients, + coefficients_length, + 2 * length + 1)) { + RTC_DCHECK_GT(length, 0); + RTC_DCHECK(coefficients); + RTC_DCHECK_GT(coefficients_length, 0); + memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0])); +} + +WPDNode::~WPDNode() {} + +int WPDNode::Update(const float* parent_data, size_t parent_data_length) { + if (!parent_data || (parent_data_length / 2) != length_) { + return -1; + } + + // Filter data. + filter_->Filter(parent_data, parent_data_length, data_.get()); + + // Decimate data. + const bool kOddSequence = true; + size_t output_samples = DyadicDecimate( + data_.get(), parent_data_length, kOddSequence, data_.get(), length_); + if (output_samples != length_) { + return -1; + } + + // Get abs to all values. + for (size_t i = 0; i < length_; ++i) { + data_[i] = fabs(data_[i]); + } + + return 0; +} + +int WPDNode::set_data(const float* new_data, size_t length) { + if (!new_data || length != length_) { + return -1; + } + memcpy(data_.get(), new_data, length * sizeof(data_[0])); + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node.h new file mode 100644 index 0000000000..117a6338a0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ + +#include <memory> + +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class FIRFilter; + +// A single node of a Wavelet Packet Decomposition (WPD) tree. +class WPDNode { + public: + // Creates a WPDNode. The data vector will contain zeros. The filter will have + // the coefficients provided. + WPDNode(size_t length, const float* coefficients, size_t coefficients_length); + ~WPDNode(); + + // Updates the node data. |parent_data| / 2 must be equals to |length_|. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* parent_data, size_t parent_data_length); + + const float* data() const { return data_.get(); } + // Returns 0 if correct, and -1 otherwise. + int set_data(const float* new_data, size_t length); + size_t length() const { return length_; } + + private: + std::unique_ptr<float[]> data_; + size_t length_; + std::unique_ptr<FIRFilter> filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc new file mode 100644 index 0000000000..1929361055 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_node_unittest.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_node.h" + +#include <string.h> + +#include "test/gtest.h" + +namespace webrtc { + +static const size_t kDataLength = 5; +static const float kTolerance = 0.0001f; + +static const size_t kParentDataLength = kDataLength * 2; +static const float kParentData[kParentDataLength] = + {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f}; + +static const float kCoefficients[] = {0.2f, -0.3f, 0.5f, -0.7f, 0.11f}; +static const size_t kCoefficientsLength = sizeof(kCoefficients) / + sizeof(kCoefficients[0]); + +TEST(WPDNodeTest, Accessors) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.set_data(kParentData, kDataLength)); + EXPECT_EQ(0, memcmp(node.data(), + kParentData, + kDataLength * sizeof(node.data()[0]))); +} + +TEST(WPDNodeTest, UpdateThatOnlyDecimates) { + const float kIndentyCoefficient = 1.f; + WPDNode node(kDataLength, &kIndentyCoefficient, 1); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + for (size_t i = 0; i < kDataLength; ++i) { + EXPECT_FLOAT_EQ(kParentData[i * 2 + 1], node.data()[i]); + } +} + +TEST(WPDNodeTest, UpdateWithArbitraryDataAndArbitraryFilter) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(0, node.Update(kParentData, kParentDataLength)); + EXPECT_NEAR(0.1f, node.data()[0], kTolerance); + EXPECT_NEAR(0.2f, node.data()[1], kTolerance); + EXPECT_NEAR(0.18f, node.data()[2], kTolerance); + EXPECT_NEAR(0.56f, node.data()[3], kTolerance); + EXPECT_NEAR(0.94f, node.data()[4], kTolerance); +} + +TEST(WPDNodeTest, ExpectedErrorReturnValue) { + WPDNode node(kDataLength, kCoefficients, kCoefficientsLength); + EXPECT_EQ(-1, node.Update(kParentData, kParentDataLength - 1)); + EXPECT_EQ(-1, node.Update(NULL, kParentDataLength)); + EXPECT_EQ(-1, node.set_data(kParentData, kDataLength - 1)); + EXPECT_EQ(-1, node.set_data(NULL, kDataLength)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree.cc new file mode 100644 index 0000000000..a01b816daf --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree.cc @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_tree.h" + +#include <math.h> +#include <string.h> + +#include "modules/audio_processing/transient/dyadic_decimator.h" +#include "modules/audio_processing/transient/wpd_node.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +WPDTree::WPDTree(size_t data_length, const float* high_pass_coefficients, + const float* low_pass_coefficients, size_t coefficients_length, + int levels) + : data_length_(data_length), + levels_(levels), + num_nodes_((1 << (levels + 1)) - 1) { + RTC_DCHECK_GT(data_length, (static_cast<size_t>(1) << levels)); + RTC_DCHECK(high_pass_coefficients); + RTC_DCHECK(low_pass_coefficients); + RTC_DCHECK_GT(levels, 0); + // Size is 1 more, so we can use the array as 1-based. nodes_[0] is never + // allocated. + nodes_.reset(new std::unique_ptr<WPDNode>[num_nodes_ + 1]); + + // Create the first node + const float kRootCoefficient = 1.f; // Identity Coefficient. + nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1)); + // Variables used to create the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + // Branching each node in each level to create its children. The last level is + // not branched (all the nodes of that level are leaves). + for (int current_level = 0; current_level < levels; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2, + low_pass_coefficients, + coefficients_length)); + nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2, + high_pass_coefficients, + coefficients_length)); + } + } +} + +WPDTree::~WPDTree() {} + +WPDNode* WPDTree::NodeAt(int level, int index) { + if (level < 0 || level > levels_ || index < 0 || index >= 1 << level) { + return NULL; + } + + return nodes_[(1 << level) + index].get(); +} + +int WPDTree::Update(const float* data, size_t data_length) { + if (!data || data_length != data_length_) { + return -1; + } + + // Update the root node. + int update_result = nodes_[1]->set_data(data, data_length); + if (update_result != 0) { + return -1; + } + + // Variables used to update the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + for (int current_level = 0; current_level < levels_; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + + update_result = nodes_[index_left_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + + update_result = nodes_[index_right_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + } + } + + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree.h b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree.h new file mode 100644 index 0000000000..53fc06b294 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ +#define MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ + +#include <memory> + +#include "modules/audio_processing/transient/wpd_node.h" + +namespace webrtc { + +// Tree of a Wavelet Packet Decomposition (WPD). +// +// The root node contains all the data provided; for each node in the tree, the +// left child contains the approximation coefficients extracted from the node, +// and the right child contains the detail coefficients. +// It preserves its state, so it can be multiple-called. +// +// The number of nodes in the tree will be 2 ^ levels - 1. +// +// Implementation details: Since the tree always will be a complete binary tree, +// it is implemented using a single linear array instead of managing the +// relationships in each node. For convience is better to use a array that +// starts in 1 (instead of 0). Taking that into account, the following formulas +// apply: +// Root node index: 1. +// Node(Level, Index in that level): 2 ^ Level + (Index in that level). +// Left Child: Current node index * 2. +// Right Child: Current node index * 2 + 1. +// Parent: Current Node Index / 2 (Integer division). +class WPDTree { + public: + // Creates a WPD tree using the data length and coefficients provided. + WPDTree(size_t data_length, + const float* high_pass_coefficients, + const float* low_pass_coefficients, + size_t coefficients_length, + int levels); + ~WPDTree(); + + // Returns the number of nodes at any given level. + static int NumberOfNodesAtLevel(int level) { + return 1 << level; + } + + // Returns a pointer to the node at the given level and index(of that level). + // Level goes from 0 to levels(). + // Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1. + // + // You can use the following formulas to get any node within the tree: + // Notation: (Level, Index of node in that level). + // Root node: (0/0). + // Left Child: (Current node level + 1, Current node index * 2). + // Right Child: (Current node level + 1, Current node index * 2 + 1). + // Parent: (Current node level - 1, Current node index / 2) (Integer division) + // + // If level or index are out of bounds the function will return NULL. + WPDNode* NodeAt(int level, int index); + + // Updates all the nodes of the tree with the new data. |data_length| must be + // teh same that was used for the creation of the tree. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* data, size_t data_length); + + // Returns the total number of levels below the root. Root is cosidered level + // 0. + int levels() const { return levels_; } + + // Returns the total number of nodes. + int num_nodes() const { return num_nodes_; } + + // Returns the total number of leaves. + int num_leaves() const { return 1 << levels_; } + + private: + size_t data_length_; + int levels_; + int num_nodes_; + std::unique_ptr<std::unique_ptr<WPDNode>[]> nodes_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc new file mode 100644 index 0000000000..a90af7766b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/transient/wpd_tree.h" + +#include <memory> +#include <sstream> +#include <string> + +#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "modules/audio_processing/transient/file_utils.h" +#include "system_wrappers/include/file_wrapper.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(WPDTreeTest, Construction) { + const size_t kTestBufferSize = 100; + const int kLevels = 5; + const int kExpectedNumberOfNodes = (1 << (kLevels + 1)) - 1; + + float test_buffer[kTestBufferSize]; + memset(test_buffer, 0.f, kTestBufferSize * sizeof(*test_buffer)); + float test_coefficients[] = {1.f, 2.f, 3.f, 4.f, 5.f}; + const size_t kTestCoefficientsLength = sizeof(test_coefficients) / + sizeof(test_coefficients[0]); + WPDTree tree(kTestBufferSize, + test_coefficients, + test_coefficients, + kTestCoefficientsLength, + kLevels); + ASSERT_EQ(kExpectedNumberOfNodes, tree.num_nodes()); + // Checks for NodeAt(level, index). + int nodes_at_level = 0; + for (int level = 0; level <= kLevels; ++level) { + nodes_at_level = 1 << level; + for (int i = 0; i < nodes_at_level; ++i) { + ASSERT_TRUE(NULL != tree.NodeAt(level, i)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(level, -1)); + EXPECT_EQ(NULL, tree.NodeAt(level, -12)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level)); + EXPECT_EQ(NULL, tree.NodeAt(level, nodes_at_level + 5)); + } + // Out of bounds. + EXPECT_EQ(NULL, tree.NodeAt(-1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(-12, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 1, 0)); + EXPECT_EQ(NULL, tree.NodeAt(kLevels + 5, 0)); + // Checks for Update(). + EXPECT_EQ(0, tree.Update(test_buffer, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(NULL, kTestBufferSize)); + EXPECT_EQ(-1, tree.Update(test_buffer, kTestBufferSize - 1)); +} + +// This test is for the correctness of the tree. +// Checks the results from the Matlab equivalent, it is done comparing the +// results that are stored in the output files from Matlab. +// It also writes the results in its own set of files in the out directory. +// Matlab and output files contain all the results in double precision (Little +// endian) appended. +#if defined(WEBRTC_IOS) +TEST(WPDTreeTest, DISABLED_CorrectnessBasedOnMatlabFiles) { +#else +TEST(WPDTreeTest, CorrectnessBasedOnMatlabFiles) { +#endif + // 10 ms at 16000 Hz. + const size_t kTestBufferSize = 160; + const int kLevels = 3; + const int kLeaves = 1 << kLevels; + const size_t kLeavesSamples = kTestBufferSize >> kLevels; + // Create tree with Discrete Meyer Wavelet Coefficients. + WPDTree tree(kTestBufferSize, + kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, + kDaubechies8CoefficientsLength, + kLevels); + // Allocate and open all matlab and out files. + std::unique_ptr<FileWrapper> matlab_files_data[kLeaves]; + std::unique_ptr<FileWrapper> out_files_data[kLeaves]; + + for (int i = 0; i < kLeaves; ++i) { + // Matlab files. + matlab_files_data[i].reset(FileWrapper::Create()); + + std::ostringstream matlab_stream; + matlab_stream << "audio_processing/transient/wpd" << i; + std::string matlab_string = test::ResourcePath(matlab_stream.str(), "dat"); + matlab_files_data[i]->OpenFile(matlab_string.c_str(), true); // Read only. + + bool file_opened = matlab_files_data[i]->is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << matlab_string; + + // Out files. + out_files_data[i].reset(FileWrapper::Create()); + + std::ostringstream out_stream; + out_stream << test::OutputPath() << "wpd_" << i << ".out"; + std::string out_string = out_stream.str(); + + out_files_data[i]->OpenFile(out_string.c_str(), false); // Write mode. + + file_opened = out_files_data[i]->is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << out_string; + } + + // Prepare the test file. + std::string test_file_name = test::ResourcePath( + "audio_processing/transient/ajm-macbook-1-spke16m", "pcm"); + + std::unique_ptr<FileWrapper> test_file(FileWrapper::Create()); + + test_file->OpenFile(test_file_name.c_str(), true); // Read only. + + bool file_opened = test_file->is_open(); + ASSERT_TRUE(file_opened) << "File could not be opened.\n" << test_file_name; + + float test_buffer[kTestBufferSize]; + + // Only the first frames of the audio file are tested. The matlab files also + // only contains information about the first frames. + const size_t kMaxFramesToTest = 100; + const float kTolerance = 0.03f; + + size_t frames_read = 0; + + // Read first buffer from the PCM test file. + size_t file_samples_read = ReadInt16FromFileToFloatBuffer(test_file.get(), + kTestBufferSize, + test_buffer); + while (file_samples_read > 0 && frames_read < kMaxFramesToTest) { + ++frames_read; + + if (file_samples_read < kTestBufferSize) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < kTestBufferSize; ++i) { + test_buffer[i] = 0.0; + } + } + tree.Update(test_buffer, kTestBufferSize); + double matlab_buffer[kTestBufferSize]; + + // Compare results with data from the matlab test files. + for (int i = 0; i < kLeaves; ++i) { + // Compare data values + size_t matlab_samples_read = + ReadDoubleBufferFromFile(matlab_files_data[i].get(), + kLeavesSamples, + matlab_buffer); + + ASSERT_EQ(kLeavesSamples, matlab_samples_read) + << "Matlab test files are malformed.\n" + << "File: 3_" << i; + // Get output data from the corresponding node + const float* node_data = tree.NodeAt(kLevels, i)->data(); + // Compare with matlab files. + for (size_t j = 0; j < kLeavesSamples; ++j) { + EXPECT_NEAR(matlab_buffer[j], node_data[j], kTolerance) + << "\nLeaf: " << i << "\nSample: " << j + << "\nFrame: " << frames_read - 1; + } + + // Write results to out files. + WriteFloatBufferToFile(out_files_data[i].get(), + kLeavesSamples, + node_data); + } + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer(test_file.get(), + kTestBufferSize, + test_buffer); + } + + // Close all matlab and out files. + for (int i = 0; i < kLeaves; ++i) { + matlab_files_data[i]->CloseFile(); + out_files_data[i]->CloseFile(); + } + + test_file->CloseFile(); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/typing_detection.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/typing_detection.cc new file mode 100644 index 0000000000..6e18124aad --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/typing_detection.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/typing_detection.h" + +namespace webrtc { + +TypingDetection::TypingDetection() + : time_active_(0), + time_since_last_typing_(0), + penalty_counter_(0), + counter_since_last_detection_update_(0), + detection_to_report_(false), + new_detection_to_report_(false), + time_window_(10), + cost_per_typing_(100), + reporting_threshold_(300), + penalty_decay_(1), + type_event_delay_(2), + report_detection_update_period_(1) { +} + +TypingDetection::~TypingDetection() {} + +bool TypingDetection::Process(bool key_pressed, bool vad_activity) { + if (vad_activity) + time_active_++; + else + time_active_ = 0; + + // Keep track if time since last typing event + if (key_pressed) + time_since_last_typing_ = 0; + else + ++time_since_last_typing_; + + if (time_since_last_typing_ < type_event_delay_ && + vad_activity && + time_active_ < time_window_) { + penalty_counter_ += cost_per_typing_; + if (penalty_counter_ > reporting_threshold_) + new_detection_to_report_ = true; + } + + if (penalty_counter_ > 0) + penalty_counter_ -= penalty_decay_; + + if (++counter_since_last_detection_update_ == + report_detection_update_period_) { + detection_to_report_ = new_detection_to_report_; + new_detection_to_report_ = false; + counter_since_last_detection_update_ = 0; + } + + return detection_to_report_; +} + +int TypingDetection::TimeSinceLastDetectionInSeconds() { + // Round to whole seconds. + return (time_since_last_typing_ + 50) / 100; +} + +void TypingDetection::SetParameters(int time_window, + int cost_per_typing, + int reporting_threshold, + int penalty_decay, + int type_event_delay, + int report_detection_update_period) { + if (time_window) time_window_ = time_window; + + if (cost_per_typing) cost_per_typing_ = cost_per_typing; + + if (reporting_threshold) reporting_threshold_ = reporting_threshold; + + if (penalty_decay) penalty_decay_ = penalty_decay; + + if (type_event_delay) type_event_delay_ = type_event_delay; + + if (report_detection_update_period) + report_detection_update_period_ = report_detection_update_period; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/typing_detection.h b/third_party/libwebrtc/webrtc/modules/audio_processing/typing_detection.h new file mode 100644 index 0000000000..fe74a5956e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/typing_detection.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ +#define MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ + +#include "modules/include/module_common_types.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class TypingDetection { + public: + TypingDetection(); + virtual ~TypingDetection(); + + // Run the detection algortihm. Shall be called every 10 ms. Returns true if + // typing is detected, or false if not, based on the update period as set with + // SetParameters(). See |report_detection_update_period_| description below. + bool Process(bool key_pressed, bool vad_activity); + + // Gets the time in seconds since the last detection. + int TimeSinceLastDetectionInSeconds(); + + // Sets the algorithm parameters. A parameter value of 0 leaves it unchanged. + // See the correspondning member variables below for descriptions. + void SetParameters(int time_window, + int cost_per_typing, + int reporting_threshold, + int penalty_decay, + int type_event_delay, + int report_detection_update_period); + + private: + int time_active_; + int time_since_last_typing_; + int penalty_counter_; + + // Counter since last time the detection status reported by Process() was + // updated. See also |report_detection_update_period_|. + int counter_since_last_detection_update_; + + // The detection status to report. Updated every + // |report_detection_update_period_| call to Process(). + bool detection_to_report_; + + // What |detection_to_report_| should be set to next time it is updated. + bool new_detection_to_report_; + + // Settable threshold values. + + // Number of 10 ms slots accepted to count as a hit. + int time_window_; + + // Penalty added for a typing + activity coincide. + int cost_per_typing_; + + // Threshold for |penalty_counter_|. + int reporting_threshold_; + + // How much we reduce |penalty_counter_| every 10 ms. + int penalty_decay_; + + // How old typing events we allow. + int type_event_delay_; + + // Settable update period. + + // Number of 10 ms slots between each update of the detection status returned + // by Process(). This inertia added to the algorithm is usually desirable and + // provided so that consumers of the class don't have to implement that + // themselves if they don't wish. + // If set to 1, each call to Process() will return the detection status for + // that 10 ms slot. + // If set to N (where N > 1), the detection status returned from Process() + // will remain the same until Process() has been called N times. Then, if none + // of the last N calls to Process() has detected typing for each respective + // 10 ms slot, Process() will return false. If at least one of the last N + // calls has detected typing, Process() will return true. And that returned + // status will then remain the same until the next N calls have been done. + int report_detection_update_period_; +}; + +} // namespace webrtc + +#endif // #ifndef MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc new file mode 100644 index 0000000000..3d766929c6 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.cc @@ -0,0 +1,53 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/block_mean_calculator.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +BlockMeanCalculator::BlockMeanCalculator(size_t block_length) + : block_length_(block_length), + count_(0), + sum_(0.0), + mean_(0.0) { + RTC_DCHECK(block_length_ != 0); +} + +void BlockMeanCalculator::Reset() { + Clear(); + mean_ = 0.0; +} + +void BlockMeanCalculator::AddValue(float value) { + sum_ += value; + ++count_; + if (count_ == block_length_) { + mean_ = sum_ / block_length_; + Clear(); + } +} + +bool BlockMeanCalculator::EndOfBlock() const { + return count_ == 0; +} + +float BlockMeanCalculator::GetLatestMean() const { + return mean_; +} + +// Flush all samples added. +void BlockMeanCalculator::Clear() { + count_ = 0; + sum_ = 0.0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.h new file mode 100644 index 0000000000..cfa7cfbeba --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator.h @@ -0,0 +1,52 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_ + +#include <stddef.h> + +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// BlockMeanCalculator calculates the mean of a block of values. Values are +// added one after another, and the mean is updated at the end of every block. +class BlockMeanCalculator { + public: + explicit BlockMeanCalculator(size_t block_length); + + // Reset. + void Reset(); + + // Add one value to the sequence. + void AddValue(float value); + + // Return whether the latest added value was at the end of a block. + bool EndOfBlock() const; + + // Return the latest mean. + float GetLatestMean() const; + + private: + // Clear all values added. + void Clear(); + + const size_t block_length_; + size_t count_; + float sum_; + float mean_; + + RTC_DISALLOW_COPY_AND_ASSIGN(BlockMeanCalculator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_BLOCK_MEAN_CALCULATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc new file mode 100644 index 0000000000..1f4ebf1b67 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/block_mean_calculator_unittest.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/block_mean_calculator.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(MeanCalculatorTest, Correctness) { + const size_t kBlockLength = 10; + BlockMeanCalculator mean_calculator(kBlockLength); + size_t i = 0; + float reference = 0.0; + + for (; i < kBlockLength - 1; ++i) { + mean_calculator.AddValue(static_cast<float>(i)); + EXPECT_FALSE(mean_calculator.EndOfBlock()); + } + mean_calculator.AddValue(static_cast<float>(i++)); + EXPECT_TRUE(mean_calculator.EndOfBlock()); + + for (; i < 3 * kBlockLength; ++i) { + const bool end_of_block = i % kBlockLength == 0; + if (end_of_block) { + // Sum of (i - kBlockLength) ... (i - 1) + reference = i - 0.5 * (1 + kBlockLength); + } + EXPECT_EQ(mean_calculator.EndOfBlock(), end_of_block); + EXPECT_EQ(reference, mean_calculator.GetLatestMean()); + mean_calculator.AddValue(static_cast<float>(i)); + } +} + +TEST(MeanCalculatorTest, Reset) { + const size_t kBlockLength = 10; + BlockMeanCalculator mean_calculator(kBlockLength); + for (size_t i = 0; i < kBlockLength - 1; ++i) { + mean_calculator.AddValue(static_cast<float>(i)); + } + mean_calculator.Reset(); + size_t i = 0; + for (; i < kBlockLength - 1; ++i) { + mean_calculator.AddValue(static_cast<float>(i)); + EXPECT_FALSE(mean_calculator.EndOfBlock()); + } + mean_calculator.AddValue(static_cast<float>(i)); + EXPECT_TRUE(mean_calculator.EndOfBlock()); + EXPECT_EQ(mean_calculator.GetLatestMean(), 0.5 * (kBlockLength - 1)); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc new file mode 100644 index 0000000000..871b541651 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.cc @@ -0,0 +1,703 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator.h" + +#include <stdlib.h> +#include <string.h> +#include <algorithm> + +#include "rtc_base/checks.h" + +// Number of right shifts for scaling is linearly depending on number of bits in +// the far-end binary spectrum. +static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum. +static const int kShiftsLinearSlope = 3; + +static const int32_t kProbabilityOffset = 1024; // 2 in Q9. +static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9. +static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9. + +// Robust validation settings +static const float kHistogramMax = 3000.f; +static const float kLastHistogramMax = 250.f; +static const float kMinHistogramThreshold = 1.5f; +static const int kMinRequiredHits = 10; +static const int kMaxHitsWhenPossiblyNonCausal = 10; +static const int kMaxHitsWhenPossiblyCausal = 1000; +static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0. +static const float kFractionSlope = 0.05f; +static const float kMinFractionWhenPossiblyCausal = 0.5f; +static const float kMinFractionWhenPossiblyNonCausal = 0.25f; + +// Counts and returns number of bits of a 32-bit word. +static int BitCount(uint32_t u32) { + uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) - + ((u32 >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + return ((int) tmp); +} + +// Compares the |binary_vector| with all rows of the |binary_matrix| and counts +// per row the number of times they have the same value. +// +// Inputs: +// - binary_vector : binary "vector" stored in a long +// - binary_matrix : binary "matrix" stored as a vector of long +// - matrix_size : size of binary "matrix" +// +// Output: +// - bit_counts : "Vector" stored as a long, containing for each +// row the number of times the matrix row and the +// input vector have the same value +// +static void BitCountComparison(uint32_t binary_vector, + const uint32_t* binary_matrix, + int matrix_size, + int32_t* bit_counts) { + int n = 0; + + // Compare |binary_vector| with all rows of the |binary_matrix| + for (; n < matrix_size; n++) { + bit_counts[n] = (int32_t) BitCount(binary_vector ^ binary_matrix[n]); + } +} + +// Collects necessary statistics for the HistogramBasedValidation(). This +// function has to be called prior to calling HistogramBasedValidation(). The +// statistics updated and used by the HistogramBasedValidation() are: +// 1. the number of |candidate_hits|, which states for how long we have had the +// same |candidate_delay| +// 2. the |histogram| of candidate delays over time. This histogram is +// weighted with respect to a reliability measure and time-varying to cope +// with possible delay shifts. +// For further description see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - valley_depth_q14 : The cost function has a valley/minimum at the +// |candidate_delay| location. |valley_depth_q14| is the +// cost function difference between the minimum and +// maximum locations. The value is in the Q14 domain. +// - valley_level_q14 : Is the cost function value at the minimum, in Q14. +static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self, + int candidate_delay, + int32_t valley_depth_q14, + int32_t valley_level_q14) { + const float valley_depth = valley_depth_q14 * kQ14Scaling; + float decrease_in_last_set = valley_depth; + const int max_hits_for_slow_change = (candidate_delay < self->last_delay) ? + kMaxHitsWhenPossiblyNonCausal : kMaxHitsWhenPossiblyCausal; + int i = 0; + + RTC_DCHECK_EQ(self->history_size, self->farend->history_size); + // Reset |candidate_hits| if we have a new candidate. + if (candidate_delay != self->last_candidate_delay) { + self->candidate_hits = 0; + self->last_candidate_delay = candidate_delay; + } + self->candidate_hits++; + + // The |histogram| is updated differently across the bins. + // 1. The |candidate_delay| histogram bin is increased with the + // |valley_depth|, which is a simple measure of how reliable the + // |candidate_delay| is. The histogram is not increased above + // |kHistogramMax|. + self->histogram[candidate_delay] += valley_depth; + if (self->histogram[candidate_delay] > kHistogramMax) { + self->histogram[candidate_delay] = kHistogramMax; + } + // 2. The histogram bins in the neighborhood of |candidate_delay| are + // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}. + // 3. The histogram bins in the neighborhood of |last_delay| are decreased + // with |decrease_in_last_set|. This value equals the difference between + // the cost function values at the locations |candidate_delay| and + // |last_delay| until we reach |max_hits_for_slow_change| consecutive hits + // at the |candidate_delay|. If we exceed this amount of hits the + // |candidate_delay| is a "potential" candidate and we start decreasing + // these histogram bins more rapidly with |valley_depth|. + if (self->candidate_hits < max_hits_for_slow_change) { + decrease_in_last_set = (self->mean_bit_counts[self->compare_delay] - + valley_level_q14) * kQ14Scaling; + } + // 4. All other bins are decreased with |valley_depth|. + // TODO(bjornv): Investigate how to make this loop more efficient. Split up + // the loop? Remove parts that doesn't add too much. + for (i = 0; i < self->history_size; ++i) { + int is_in_last_set = (i >= self->last_delay - 2) && + (i <= self->last_delay + 1) && (i != candidate_delay); + int is_in_candidate_set = (i >= candidate_delay - 2) && + (i <= candidate_delay + 1); + self->histogram[i] -= decrease_in_last_set * is_in_last_set + + valley_depth * (!is_in_last_set && !is_in_candidate_set); + // 5. No histogram bin can go below 0. + if (self->histogram[i] < 0) { + self->histogram[i] = 0; + } + } +} + +// Validates the |candidate_delay|, estimated in WebRtc_ProcessBinarySpectrum(), +// based on a mix of counting concurring hits with a modified histogram +// of recent delay estimates. In brief a candidate is valid (returns 1) if it +// is the most likely according to the histogram. There are a couple of +// exceptions that are worth mentioning: +// 1. If the |candidate_delay| < |last_delay| it can be that we are in a +// non-causal state, breaking a possible echo control algorithm. Hence, we +// open up for a quicker change by allowing the change even if the +// |candidate_delay| is not the most likely one according to the histogram. +// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram +// value has to reached a minimum (kMinHistogramThreshold) to be valid. +// 3. The action is also depending on the filter length used for echo control. +// If the delay difference is larger than what the filter can capture, we +// also move quicker towards a change. +// For further description see commented code. +// +// Input: +// - candidate_delay : The delay to validate. +// +// Return value: +// - is_histogram_valid : 1 - The |candidate_delay| is valid. +// 0 - Otherwise. +static int HistogramBasedValidation(const BinaryDelayEstimator* self, + int candidate_delay) { + float fraction = 1.f; + float histogram_threshold = self->histogram[self->compare_delay]; + const int delay_difference = candidate_delay - self->last_delay; + int is_histogram_valid = 0; + + // The histogram based validation of |candidate_delay| is done by comparing + // the |histogram| at bin |candidate_delay| with a |histogram_threshold|. + // This |histogram_threshold| equals a |fraction| of the |histogram| at bin + // |last_delay|. The |fraction| is a piecewise linear function of the + // |delay_difference| between the |candidate_delay| and the |last_delay| + // allowing for a quicker move if + // i) a potential echo control filter can not handle these large differences. + // ii) keeping |last_delay| instead of updating to |candidate_delay| could + // force an echo control into a non-causal state. + // We further require the histogram to have reached a minimum value of + // |kMinHistogramThreshold|. In addition, we also require the number of + // |candidate_hits| to be more than |kMinRequiredHits| to remove spurious + // values. + + // Calculate a comparison histogram value (|histogram_threshold|) that is + // depending on the distance between the |candidate_delay| and |last_delay|. + // TODO(bjornv): How much can we gain by turning the fraction calculation + // into tables? + if (delay_difference > self->allowed_offset) { + fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset); + fraction = (fraction > kMinFractionWhenPossiblyCausal ? fraction : + kMinFractionWhenPossiblyCausal); + } else if (delay_difference < 0) { + fraction = kMinFractionWhenPossiblyNonCausal - + kFractionSlope * delay_difference; + fraction = (fraction > 1.f ? 1.f : fraction); + } + histogram_threshold *= fraction; + histogram_threshold = (histogram_threshold > kMinHistogramThreshold ? + histogram_threshold : kMinHistogramThreshold); + + is_histogram_valid = + (self->histogram[candidate_delay] >= histogram_threshold) && + (self->candidate_hits > kMinRequiredHits); + + return is_histogram_valid; +} + +// Performs a robust validation of the |candidate_delay| estimated in +// WebRtc_ProcessBinarySpectrum(). The algorithm takes the +// |is_instantaneous_valid| and the |is_histogram_valid| and combines them +// into a robust validation. The HistogramBasedValidation() has to be called +// prior to this call. +// For further description on how the combination is done, see commented code. +// +// Inputs: +// - candidate_delay : The delay to validate. +// - is_instantaneous_valid : The instantaneous validation performed in +// WebRtc_ProcessBinarySpectrum(). +// - is_histogram_valid : The histogram based validation. +// +// Return value: +// - is_robust : 1 - The candidate_delay is valid according to a +// combination of the two inputs. +// : 0 - Otherwise. +static int RobustValidation(const BinaryDelayEstimator* self, + int candidate_delay, + int is_instantaneous_valid, + int is_histogram_valid) { + int is_robust = 0; + + // The final robust validation is based on the two algorithms; 1) the + // |is_instantaneous_valid| and 2) the histogram based with result stored in + // |is_histogram_valid|. + // i) Before we actually have a valid estimate (|last_delay| == -2), we say + // a candidate is valid if either algorithm states so + // (|is_instantaneous_valid| OR |is_histogram_valid|). + is_robust = (self->last_delay < 0) && + (is_instantaneous_valid || is_histogram_valid); + // ii) Otherwise, we need both algorithms to be certain + // (|is_instantaneous_valid| AND |is_histogram_valid|) + is_robust |= is_instantaneous_valid && is_histogram_valid; + // iii) With one exception, i.e., the histogram based algorithm can overrule + // the instantaneous one if |is_histogram_valid| = 1 and the histogram + // is significantly strong. + is_robust |= is_histogram_valid && + (self->histogram[candidate_delay] > self->last_delay_histogram); + + return is_robust; +} + +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + + if (self == NULL) { + return; + } + + free(self->binary_far_history); + self->binary_far_history = NULL; + + free(self->far_bit_counts); + self->far_bit_counts = NULL; + + free(self); +} + +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size) { + BinaryDelayEstimatorFarend* self = NULL; + + if (history_size > 1) { + // Sanity conditions fulfilled. + self = static_cast<BinaryDelayEstimatorFarend*>( + malloc(sizeof(BinaryDelayEstimatorFarend))); + } + if (self == NULL) { + return NULL; + } + + self->history_size = 0; + self->binary_far_history = NULL; + self->far_bit_counts = NULL; + if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) { + WebRtc_FreeBinaryDelayEstimatorFarend(self); + self = NULL; + } + return self; +} + +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size) { + RTC_DCHECK(self); + // (Re-)Allocate memory for history buffers. + self->binary_far_history = static_cast<uint32_t*>( + realloc(self->binary_far_history, + history_size * sizeof(*self->binary_far_history))); + self->far_bit_counts = static_cast<int*>( + realloc(self->far_bit_counts, + history_size * sizeof(*self->far_bit_counts))); + if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->binary_far_history[self->history_size], + 0, + sizeof(*self->binary_far_history) * size_diff); + memset(&self->far_bit_counts[self->history_size], + 0, + sizeof(*self->far_bit_counts) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + RTC_DCHECK(self); + memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size); + memset(self->far_bit_counts, 0, sizeof(int) * self->history_size); +} + +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, int delay_shift) { + int abs_shift = abs(delay_shift); + int shift_size = 0; + int dest_index = 0; + int src_index = 0; + int padding_index = 0; + + RTC_DCHECK(self); + shift_size = self->history_size - abs_shift; + RTC_DCHECK_GT(shift_size, 0); + if (delay_shift == 0) { + return; + } else if (delay_shift > 0) { + dest_index = abs_shift; + } else if (delay_shift < 0) { + src_index = abs_shift; + padding_index = shift_size; + } + + // Shift and zero pad buffers. + memmove(&self->binary_far_history[dest_index], + &self->binary_far_history[src_index], + sizeof(*self->binary_far_history) * shift_size); + memset(&self->binary_far_history[padding_index], 0, + sizeof(*self->binary_far_history) * abs_shift); + memmove(&self->far_bit_counts[dest_index], + &self->far_bit_counts[src_index], + sizeof(*self->far_bit_counts) * shift_size); + memset(&self->far_bit_counts[padding_index], 0, + sizeof(*self->far_bit_counts) * abs_shift); +} + +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle, + uint32_t binary_far_spectrum) { + RTC_DCHECK(handle); + // Shift binary spectrum history and insert current |binary_far_spectrum|. + memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]), + (handle->history_size - 1) * sizeof(uint32_t)); + handle->binary_far_history[0] = binary_far_spectrum; + + // Shift history of far-end binary spectrum bit counts and insert bit count + // of current |binary_far_spectrum|. + memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]), + (handle->history_size - 1) * sizeof(int)); + handle->far_bit_counts[0] = BitCount(binary_far_spectrum); +} + +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) { + + if (self == NULL) { + return; + } + + free(self->mean_bit_counts); + self->mean_bit_counts = NULL; + + free(self->bit_counts); + self->bit_counts = NULL; + + free(self->binary_near_history); + self->binary_near_history = NULL; + + free(self->histogram); + self->histogram = NULL; + + // BinaryDelayEstimator does not have ownership of |farend|, hence we do not + // free the memory here. That should be handled separately by the user. + self->farend = NULL; + + free(self); +} + +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, int max_lookahead) { + BinaryDelayEstimator* self = NULL; + + if ((farend != NULL) && (max_lookahead >= 0)) { + // Sanity conditions fulfilled. + self = static_cast<BinaryDelayEstimator*>( + malloc(sizeof(BinaryDelayEstimator))); + } + if (self == NULL) { + return NULL; + } + + self->farend = farend; + self->near_history_size = max_lookahead + 1; + self->history_size = 0; + self->robust_validation_enabled = 0; // Disabled by default. + self->allowed_offset = 0; + + self->lookahead = max_lookahead; + + // Allocate memory for spectrum and history buffers. + self->mean_bit_counts = NULL; + self->bit_counts = NULL; + self->histogram = NULL; + self->binary_near_history = static_cast<uint32_t*>( + malloc((max_lookahead + 1) * sizeof(*self->binary_near_history))); + if (self->binary_near_history == NULL || + WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) { + WebRtc_FreeBinaryDelayEstimator(self); + self = NULL; + } + + return self; +} + +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size) { + BinaryDelayEstimatorFarend* far = self->farend; + // (Re-)Allocate memory for spectrum and history buffers. + if (history_size != far->history_size) { + // Only update far-end buffers if we need. + history_size = WebRtc_AllocateFarendBufferMemory(far, history_size); + } + // The extra array element in |mean_bit_counts| and |histogram| is a dummy + // element only used while |last_delay| == -2, i.e., before we have a valid + // estimate. + self->mean_bit_counts = static_cast<int32_t*>( + realloc(self->mean_bit_counts, + (history_size + 1) * sizeof(*self->mean_bit_counts))); + self->bit_counts = static_cast<int32_t*>( + realloc(self->bit_counts, history_size * sizeof(*self->bit_counts))); + self->histogram = static_cast<float*>( + realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram))); + + if ((self->mean_bit_counts == NULL) || + (self->bit_counts == NULL) || + (self->histogram == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->mean_bit_counts[self->history_size], + 0, + sizeof(*self->mean_bit_counts) * size_diff); + memset(&self->bit_counts[self->history_size], + 0, + sizeof(*self->bit_counts) * size_diff); + memset(&self->histogram[self->history_size], + 0, + sizeof(*self->histogram) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) { + int i = 0; + RTC_DCHECK(self); + + memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size); + memset(self->binary_near_history, + 0, + sizeof(uint32_t) * self->near_history_size); + for (i = 0; i <= self->history_size; ++i) { + self->mean_bit_counts[i] = (20 << 9); // 20 in Q9. + self->histogram[i] = 0.f; + } + self->minimum_probability = kMaxBitCountsQ9; // 32 in Q9. + self->last_delay_probability = (int) kMaxBitCountsQ9; // 32 in Q9. + + // Default return value if we're unable to estimate. -1 is used for errors. + self->last_delay = -2; + + self->last_candidate_delay = -2; + self->compare_delay = self->history_size; + self->candidate_hits = 0; + self->last_delay_histogram = 0.f; +} + +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift) { + int lookahead = 0; + RTC_DCHECK(self); + lookahead = self->lookahead; + self->lookahead -= delay_shift; + if (self->lookahead < 0) { + self->lookahead = 0; + } + if (self->lookahead > self->near_history_size - 1) { + self->lookahead = self->near_history_size - 1; + } + return lookahead - self->lookahead; +} + +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum) { + int i = 0; + int candidate_delay = -1; + int valid_candidate = 0; + + int32_t value_best_candidate = kMaxBitCountsQ9; + int32_t value_worst_candidate = 0; + int32_t valley_depth = 0; + + RTC_DCHECK(self); + if (self->farend->history_size != self->history_size) { + // Non matching history sizes. + return -1; + } + if (self->near_history_size > 1) { + // If we apply lookahead, shift near-end binary spectrum history. Insert + // current |binary_near_spectrum| and pull out the delayed one. + memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]), + (self->near_history_size - 1) * sizeof(uint32_t)); + self->binary_near_history[0] = binary_near_spectrum; + binary_near_spectrum = self->binary_near_history[self->lookahead]; + } + + // Compare with delayed spectra and store the |bit_counts| for each delay. + BitCountComparison(binary_near_spectrum, self->farend->binary_far_history, + self->history_size, self->bit_counts); + + // Update |mean_bit_counts|, which is the smoothed version of |bit_counts|. + for (i = 0; i < self->history_size; i++) { + // |bit_counts| is constrained to [0, 32], meaning we can smooth with a + // factor up to 2^26. We use Q9. + int32_t bit_count = (self->bit_counts[i] << 9); // Q9. + + // Update |mean_bit_counts| only when far-end signal has something to + // contribute. If |far_bit_counts| is zero the far-end signal is weak and + // we likely have a poor echo condition, hence don't update. + if (self->farend->far_bit_counts[i] > 0) { + // Make number of right shifts piecewise linear w.r.t. |far_bit_counts|. + int shifts = kShiftsAtZero; + shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4; + WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i])); + } + } + + // Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate| + // of |mean_bit_counts|. + for (i = 0; i < self->history_size; i++) { + if (self->mean_bit_counts[i] < value_best_candidate) { + value_best_candidate = self->mean_bit_counts[i]; + candidate_delay = i; + } + if (self->mean_bit_counts[i] > value_worst_candidate) { + value_worst_candidate = self->mean_bit_counts[i]; + } + } + valley_depth = value_worst_candidate - value_best_candidate; + + // The |value_best_candidate| is a good indicator on the probability of + // |candidate_delay| being an accurate delay (a small |value_best_candidate| + // means a good binary match). In the following sections we make a decision + // whether to update |last_delay| or not. + // 1) If the difference bit counts between the best and the worst delay + // candidates is too small we consider the situation to be unreliable and + // don't update |last_delay|. + // 2) If the situation is reliable we update |last_delay| if the value of the + // best candidate delay has a value less than + // i) an adaptive threshold |minimum_probability|, or + // ii) this corresponding value |last_delay_probability|, but updated at + // this time instant. + + // Update |minimum_probability|. + if ((self->minimum_probability > kProbabilityLowerLimit) && + (valley_depth > kProbabilityMinSpread)) { + // The "hard" threshold can't be lower than 17 (in Q9). + // The valley in the curve also has to be distinct, i.e., the + // difference between |value_worst_candidate| and |value_best_candidate| has + // to be large enough. + int32_t threshold = value_best_candidate + kProbabilityOffset; + if (threshold < kProbabilityLowerLimit) { + threshold = kProbabilityLowerLimit; + } + if (self->minimum_probability > threshold) { + self->minimum_probability = threshold; + } + } + // Update |last_delay_probability|. + // We use a Markov type model, i.e., a slowly increasing level over time. + self->last_delay_probability++; + // Validate |candidate_delay|. We have a reliable instantaneous delay + // estimate if + // 1) The valley is distinct enough (|valley_depth| > |kProbabilityOffset|) + // and + // 2) The depth of the valley is deep enough + // (|value_best_candidate| < |minimum_probability|) + // and deeper than the best estimate so far + // (|value_best_candidate| < |last_delay_probability|) + valid_candidate = ((valley_depth > kProbabilityOffset) && + ((value_best_candidate < self->minimum_probability) || + (value_best_candidate < self->last_delay_probability))); + + // Check for nonstationary farend signal. + const bool non_stationary_farend = + std::any_of(self->farend->far_bit_counts, + self->farend->far_bit_counts + self->history_size, + [](int a) { return a > 0; }); + + if (non_stationary_farend) { + // Only update the validation statistics when the farend is nonstationary + // as the underlying estimates are otherwise frozen. + UpdateRobustValidationStatistics(self, candidate_delay, valley_depth, + value_best_candidate); + } + + if (self->robust_validation_enabled) { + int is_histogram_valid = HistogramBasedValidation(self, candidate_delay); + valid_candidate = RobustValidation(self, candidate_delay, valid_candidate, + is_histogram_valid); + + } + + // Only update the delay estimate when the farend is nonstationary and when + // a valid delay candidate is available. + if (non_stationary_farend && valid_candidate) { + if (candidate_delay != self->last_delay) { + self->last_delay_histogram = + (self->histogram[candidate_delay] > kLastHistogramMax ? + kLastHistogramMax : self->histogram[candidate_delay]); + // Adjust the histogram if we made a change to |last_delay|, though it was + // not the most likely one according to the histogram. + if (self->histogram[candidate_delay] < + self->histogram[self->compare_delay]) { + self->histogram[self->compare_delay] = self->histogram[candidate_delay]; + } + } + self->last_delay = candidate_delay; + if (value_best_candidate < self->last_delay_probability) { + self->last_delay_probability = value_best_candidate; + } + self->compare_delay = self->last_delay; + } + + return self->last_delay; +} + +int WebRtc_binary_last_delay(BinaryDelayEstimator* self) { + RTC_DCHECK(self); + return self->last_delay; +} + +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) { + float quality = 0; + RTC_DCHECK(self); + + if (self->robust_validation_enabled) { + // Simply a linear function of the histogram height at delay estimate. + quality = self->histogram[self->compare_delay] / kHistogramMax; + } else { + // Note that |last_delay_probability| states how deep the minimum of the + // cost function is, so it is rather an error probability. + quality = (float) (kMaxBitCountsQ9 - self->last_delay_probability) / + kMaxBitCountsQ9; + if (quality < 0) { + quality = 0; + } + } + return quality; +} + +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value) { + int32_t diff = new_value - *mean_value; + + // mean_new = mean_value + ((new_value - mean_value) >> factor); + if (diff < 0) { + diff = -((-diff) >> factor); + } else { + diff = (diff >> factor); + } + *mean_value += diff; +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.h new file mode 100644 index 0000000000..cce6113a53 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator.h @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on binary converted spectra. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ + +#include "typedefs.h" // NOLINT(build/include) + +static const int32_t kMaxBitCountsQ9 = (32 << 9); // 32 matching bits in Q9. + +typedef struct { + // Pointer to bit counts. + int* far_bit_counts; + // Binary history variables. + uint32_t* binary_far_history; + int history_size; +} BinaryDelayEstimatorFarend; + +typedef struct { + // Pointer to bit counts. + int32_t* mean_bit_counts; + // Array only used locally in ProcessBinarySpectrum() but whose size is + // determined at run-time. + int32_t* bit_counts; + + // Binary history variables. + uint32_t* binary_near_history; + int near_history_size; + int history_size; + + // Delay estimation variables. + int32_t minimum_probability; + int last_delay_probability; + + // Delay memory. + int last_delay; + + // Robust validation + int robust_validation_enabled; + int allowed_offset; + int last_candidate_delay; + int compare_delay; + int candidate_hits; + float* histogram; + float last_delay_histogram; + + // For dynamically changing the lookahead when using SoftReset...(). + int lookahead; + + // Far-end binary spectrum history buffer etc. + BinaryDelayEstimatorFarend* farend; +} BinaryDelayEstimator; + +// Releases the memory allocated by +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// Input: +// - self : Pointer to the binary delay estimation far-end +// instance which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Allocates the memory needed by the far-end part of the binary delay +// estimation. The memory needs to be initialized separately through +// WebRtc_InitBinaryDelayEstimatorFarend(...). +// +// Inputs: +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - BinaryDelayEstimatorFarend* +// : Created |handle|. If the memory can't be allocated +// or if any of the input parameters are invalid NULL +// is returned. +// +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size); + +// Re-allocates the buffers. +// +// Inputs: +// - self : Pointer to the binary estimation far-end instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size); + +// Initializes the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - self : Pointer to the delay estimation far-end instance. +// +// Output: +// - self : Initialized far-end instance. +// +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Soft resets the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, int delay_shift); + +// Adds the binary far-end spectrum to the internal far-end history buffer. This +// spectrum is used as reference when calculating the delay using +// WebRtc_ProcessBinarySpectrum(). +// +// Inputs: +// - self : Pointer to the delay estimation far-end +// instance. +// - binary_far_spectrum : Far-end binary spectrum. +// +// Output: +// - self : Updated far-end instance. +// +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self, + uint32_t binary_far_spectrum); + +// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...). +// +// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does +// not take ownership of it, hence the BinaryDelayEstimator has to be torn down +// before the far-end. +// +// Input: +// - self : Pointer to the binary delay estimation instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Allocates the memory needed by the binary delay estimation. The memory needs +// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...). +// +// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed +// description. +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, int max_lookahead); + +// Re-allocates |history_size| dependent buffers. The far-end buffers will be +// updated at the same time if needed. +// +// Input: +// - self : Pointer to the binary estimation instance which is +// the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// - history_size : Size of the history buffers. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size); + +// Initializes the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Output: +// - self : Initialized instance. +// +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Soft resets the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +// +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift); + +// Estimates and returns the delay between the binary far-end and binary near- +// end spectra. It is assumed the binary far-end spectrum has been added using +// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by +// the lookahead (i.e. the lookahead should be subtracted from the returned +// value). +// +// Inputs: +// - self : Pointer to the delay estimation instance. +// - binary_near_spectrum : Near-end binary spectrum of the current block. +// +// Output: +// - self : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -2 - Insufficient data for estimation. +// +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum); + +// Returns the last calculated delay updated by the function +// WebRtc_ProcessBinarySpectrum(...). +// +// Input: +// - self : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value +// -2 - Insufficient data for estimation. +// +int WebRtc_binary_last_delay(BinaryDelayEstimator* self); + +// Returns the estimation quality of the last calculated delay updated by the +// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value +// in the interval [0, 1]. The higher the value, the better the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated +// delay value. +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self); + +// Updates the |mean_value| recursively with a step size of 2^-|factor|. This +// function is used internally in the Binary Delay Estimator as well as the +// Fixed point wrapper. +// +// Inputs: +// - new_value : The new value the mean should be updated with. +// - factor : The step size, in number of right shifts. +// +// Input/Output: +// - mean_value : Pointer to the mean value. +// +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value); + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_internal.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_internal.h new file mode 100644 index 0000000000..46eea3ec18 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_internal.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Header file including the delay estimator handle used for testing. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ + +#include "modules/audio_processing/utility/delay_estimator.h" +#include "typedefs.h" // NOLINT(build/include) + +typedef union { + float float_; + int32_t int32_; +} SpectrumType; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_far_spectrum; + // |mean_far_spectrum| initialization indicator. + int far_spectrum_initialized; + + int spectrum_size; + + // Far-end part of binary spectrum based delay estimation. + BinaryDelayEstimatorFarend* binary_farend; +} DelayEstimatorFarend; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_near_spectrum; + // |mean_near_spectrum| initialization indicator. + int near_spectrum_initialized; + + int spectrum_size; + + // Binary spectrum based delay estimator + BinaryDelayEstimator* binary_handle; +} DelayEstimator; + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc new file mode 100644 index 0000000000..36700e5706 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_unittest.cc @@ -0,0 +1,618 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator.h" +#include "modules/audio_processing/utility/delay_estimator_internal.h" +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "test/gtest.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace { + +enum { kSpectrumSize = 65 }; +// Delay history sizes. +enum { kMaxDelay = 100 }; +enum { kLookahead = 10 }; +enum { kHistorySize = kMaxDelay + kLookahead }; +// Length of binary spectrum sequence. +enum { kSequenceLength = 400 }; + +const int kDifferentHistorySize = 3; +const int kDifferentLookahead = 1; + +const int kEnable[] = { 0, 1 }; +const size_t kSizeEnable = sizeof(kEnable) / sizeof(*kEnable); + +class DelayEstimatorTest : public ::testing::Test { + protected: + DelayEstimatorTest(); + virtual void SetUp(); + virtual void TearDown(); + + void Init(); + void InitBinary(); + void VerifyDelay(BinaryDelayEstimator* binary_handle, int offset, int delay); + void RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, int lookahead_offset, int far_offset); + void RunBinarySpectraTest(int near_offset, int lookahead_offset, + int ref_robust_validation, int robust_validation); + + void* handle_; + DelayEstimator* self_; + void* farend_handle_; + DelayEstimatorFarend* farend_self_; + BinaryDelayEstimator* binary_; + BinaryDelayEstimatorFarend* binary_farend_; + int spectrum_size_; + // Dummy input spectra. + float far_f_[kSpectrumSize]; + float near_f_[kSpectrumSize]; + uint16_t far_u16_[kSpectrumSize]; + uint16_t near_u16_[kSpectrumSize]; + uint32_t binary_spectrum_[kSequenceLength + kHistorySize]; +}; + +DelayEstimatorTest::DelayEstimatorTest() + : handle_(NULL), + self_(NULL), + farend_handle_(NULL), + farend_self_(NULL), + binary_(NULL), + binary_farend_(NULL), + spectrum_size_(kSpectrumSize) { + // Dummy input data are set with more or less arbitrary non-zero values. + memset(far_f_, 1, sizeof(far_f_)); + memset(near_f_, 2, sizeof(near_f_)); + memset(far_u16_, 1, sizeof(far_u16_)); + memset(near_u16_, 2, sizeof(near_u16_)); + // Construct a sequence of binary spectra used to verify delay estimate. The + // |kSequenceLength| has to be long enough for the delay estimation to leave + // the initialized state. + binary_spectrum_[0] = 1; + for (int i = 1; i < (kSequenceLength + kHistorySize); i++) { + binary_spectrum_[i] = 3 * binary_spectrum_[i - 1]; + } +} + +void DelayEstimatorTest::SetUp() { + farend_handle_ = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, + kHistorySize); + ASSERT_TRUE(farend_handle_ != NULL); + farend_self_ = reinterpret_cast<DelayEstimatorFarend*>(farend_handle_); + handle_ = WebRtc_CreateDelayEstimator(farend_handle_, kLookahead); + ASSERT_TRUE(handle_ != NULL); + self_ = reinterpret_cast<DelayEstimator*>(handle_); + binary_farend_ = WebRtc_CreateBinaryDelayEstimatorFarend(kHistorySize); + ASSERT_TRUE(binary_farend_ != NULL); + binary_ = WebRtc_CreateBinaryDelayEstimator(binary_farend_, kLookahead); + ASSERT_TRUE(binary_ != NULL); +} + +void DelayEstimatorTest::TearDown() { + WebRtc_FreeDelayEstimator(handle_); + handle_ = NULL; + self_ = NULL; + WebRtc_FreeDelayEstimatorFarend(farend_handle_); + farend_handle_ = NULL; + farend_self_ = NULL; + WebRtc_FreeBinaryDelayEstimator(binary_); + binary_ = NULL; + WebRtc_FreeBinaryDelayEstimatorFarend(binary_farend_); + binary_farend_ = NULL; +} + +void DelayEstimatorTest::Init() { + // Initialize Delay Estimator + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + // Verify initialization. + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_last_delay(handle_)); // Delay in initial state. + EXPECT_FLOAT_EQ(0, WebRtc_last_delay_quality(handle_)); // Zero quality. +} + +void DelayEstimatorTest::InitBinary() { + // Initialize Binary Delay Estimator (far-end part). + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + // Initialize Binary Delay Estimator + WebRtc_InitBinaryDelayEstimator(binary_); + // Verify initialization. This does not guarantee a complete check, since + // |last_delay| may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary_->last_delay); +} + +void DelayEstimatorTest::VerifyDelay(BinaryDelayEstimator* binary_handle, + int offset, int delay) { + // Verify that we WebRtc_binary_last_delay() returns correct delay. + EXPECT_EQ(delay, WebRtc_binary_last_delay(binary_handle)); + + if (delay != -2) { + // Verify correct delay estimate. In the non-causal case the true delay + // is equivalent with the |offset|. + EXPECT_EQ(offset, delay); + } +} + +void DelayEstimatorTest::RunBinarySpectra(BinaryDelayEstimator* binary1, + BinaryDelayEstimator* binary2, + int near_offset, + int lookahead_offset, + int far_offset) { + int different_validations = binary1->robust_validation_enabled ^ + binary2->robust_validation_enabled; + WebRtc_InitBinaryDelayEstimatorFarend(binary_farend_); + WebRtc_InitBinaryDelayEstimator(binary1); + WebRtc_InitBinaryDelayEstimator(binary2); + // Verify initialization. This does not guarantee a complete check, since + // |last_delay| may be equal to -2 before initialization if done on the fly. + EXPECT_EQ(-2, binary1->last_delay); + EXPECT_EQ(-2, binary2->last_delay); + for (int i = kLookahead; i < (kSequenceLength + kLookahead); i++) { + WebRtc_AddBinaryFarSpectrum(binary_farend_, + binary_spectrum_[i + far_offset]); + int delay_1 = WebRtc_ProcessBinarySpectrum(binary1, binary_spectrum_[i]); + int delay_2 = + WebRtc_ProcessBinarySpectrum(binary2, + binary_spectrum_[i - near_offset]); + + VerifyDelay(binary1, far_offset + kLookahead, delay_1); + VerifyDelay(binary2, + far_offset + kLookahead + lookahead_offset + near_offset, + delay_2); + // Expect the two delay estimates to be offset by |lookahead_offset| + + // |near_offset| when we have left the initial state. + if ((delay_1 != -2) && (delay_2 != -2)) { + EXPECT_EQ(delay_1, delay_2 - lookahead_offset - near_offset); + } + // For the case of identical signals |delay_1| and |delay_2| should match + // all the time, unless one of them has robust validation turned on. In + // that case the robust validation leaves the initial state faster. + if ((near_offset == 0) && (lookahead_offset == 0)) { + if (!different_validations) { + EXPECT_EQ(delay_1, delay_2); + } else { + if (binary1->robust_validation_enabled) { + EXPECT_GE(delay_1, delay_2); + } else { + EXPECT_GE(delay_2, delay_1); + } + } + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_binary_last_delay(binary1)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary1)); + EXPECT_NE(-2, WebRtc_binary_last_delay(binary2)); + EXPECT_LT(0, WebRtc_binary_last_delay_quality(binary2)); +} + +void DelayEstimatorTest::RunBinarySpectraTest(int near_offset, + int lookahead_offset, + int ref_robust_validation, + int robust_validation) { + BinaryDelayEstimator* binary2 = + WebRtc_CreateBinaryDelayEstimator(binary_farend_, + kLookahead + lookahead_offset); + // Verify the delay for both causal and non-causal systems. For causal systems + // the delay is equivalent with a positive |offset| of the far-end sequence. + // For non-causal systems the delay is equivalent with a negative |offset| of + // the far-end sequence. + binary_->robust_validation_enabled = ref_robust_validation; + binary2->robust_validation_enabled = robust_validation; + for (int offset = -kLookahead; + offset < kMaxDelay - lookahead_offset - near_offset; + offset++) { + RunBinarySpectra(binary_, binary2, near_offset, lookahead_offset, offset); + } + WebRtc_FreeBinaryDelayEstimator(binary2); + binary2 = NULL; + binary_->robust_validation_enabled = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfWrapper) { + // In this test we verify correct error returns on invalid API calls. + + // WebRtc_CreateDelayEstimatorFarend() and WebRtc_CreateDelayEstimator() + // should return a NULL pointer on invalid input values. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + void* handle = farend_handle_; + handle = WebRtc_CreateDelayEstimatorFarend(33, kHistorySize); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, 1); + EXPECT_TRUE(handle == NULL); + + handle = handle_; + handle = WebRtc_CreateDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(handle == NULL); + handle = WebRtc_CreateDelayEstimator(farend_handle_, -1); + EXPECT_TRUE(handle == NULL); + + // WebRtc_InitDelayEstimatorFarend() and WebRtc_InitDelayEstimator() should + // return -1 if we have a NULL pointer as |handle|. + EXPECT_EQ(-1, WebRtc_InitDelayEstimatorFarend(NULL)); + EXPECT_EQ(-1, WebRtc_InitDelayEstimator(NULL)); + + // WebRtc_AddFarSpectrumFloat() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(NULL, far_f_, spectrum_size_)); + // Use |farend_handle_| which is properly created at SetUp(). + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, NULL, + spectrum_size_)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_ + 1)); + + // WebRtc_AddFarSpectrumFix() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as far-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in far-end spectrum (Q-domain > 15). + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(NULL, far_u16_, spectrum_size_, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, NULL, spectrum_size_, + 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 16)); + + // WebRtc_set_history_size() should return -1 if: + // 1) |handle| is a NULL. + // 2) |history_size| <= 1. + EXPECT_EQ(-1, WebRtc_set_history_size(NULL, 1)); + EXPECT_EQ(-1, WebRtc_set_history_size(handle_, 1)); + // WebRtc_history_size() should return -1 if: + // 1) NULL pointer input. + EXPECT_EQ(-1, WebRtc_history_size(NULL)); + // 2) there is a mismatch between history size. + void* tmp_handle = WebRtc_CreateDelayEstimator(farend_handle_, kHistorySize); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(tmp_handle)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(tmp_handle, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(tmp_handle)); + EXPECT_EQ(kHistorySize, WebRtc_set_history_size(handle_, kHistorySize)); + EXPECT_EQ(-1, WebRtc_history_size(tmp_handle)); + + // WebRtc_set_lookahead() should return -1 if we try a value outside the + /// buffer. + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, kLookahead + 1)); + EXPECT_EQ(-1, WebRtc_set_lookahead(handle_, -1)); + + // WebRtc_set_allowed_offset() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) |allowed_offset| < 0. + EXPECT_EQ(-1, WebRtc_set_allowed_offset(NULL, 0)); + EXPECT_EQ(-1, WebRtc_set_allowed_offset(handle_, -1)); + + EXPECT_EQ(-1, WebRtc_get_allowed_offset(NULL)); + + // WebRtc_enable_robust_validation() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) Incorrect |enable| value (not 0 or 1). + EXPECT_EQ(-1, WebRtc_enable_robust_validation(NULL, kEnable[0])); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, -1)); + EXPECT_EQ(-1, WebRtc_enable_robust_validation(handle_, 2)); + + // WebRtc_is_robust_validation_enabled() should return -1 if we have NULL + // pointer as |handle|. + EXPECT_EQ(-1, WebRtc_is_robust_validation_enabled(NULL)); + + // WebRtc_DelayEstimatorProcessFloat() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(NULL, near_f_, + spectrum_size_)); + // Use |handle_| which is properly created at SetUp(). + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, NULL, + spectrum_size_)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_ + 1)); + // |tmp_handle| is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFloat(tmp_handle, + near_f_, + spectrum_size_)); + + // WebRtc_DelayEstimatorProcessFix() should return -1 if we have: + // 1) NULL pointer as |handle|. + // 2) NULL pointer as near-end spectrum. + // 3) Incorrect spectrum size. + // 4) Too high precision in near-end spectrum (Q-domain > 15). + // 5) Non matching history sizes if multiple delay estimators using the same + // far-end reference. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(NULL, near_u16_, spectrum_size_, + 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, NULL, spectrum_size_, + 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_ + 1, 0)); + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 16)); + // |tmp_handle| is already in a non-matching state. + EXPECT_EQ(-1, WebRtc_DelayEstimatorProcessFix(tmp_handle, + near_u16_, + spectrum_size_, + 0)); + WebRtc_FreeDelayEstimator(tmp_handle); + + // WebRtc_last_delay() should return -1 if we have a NULL pointer as |handle|. + EXPECT_EQ(-1, WebRtc_last_delay(NULL)); + + // Free any local memory if needed. + WebRtc_FreeDelayEstimator(handle); +} + +TEST_F(DelayEstimatorTest, VerifyAllowedOffset) { + // Is set to zero by default. + EXPECT_EQ(0, WebRtc_get_allowed_offset(handle_)); + for (int i = 1; i >= 0; i--) { + EXPECT_EQ(0, WebRtc_set_allowed_offset(handle_, i)); + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(i, WebRtc_get_allowed_offset(handle_)); + } +} + +TEST_F(DelayEstimatorTest, VerifyEnableRobustValidation) { + // Disabled by default. + EXPECT_EQ(0, WebRtc_is_robust_validation_enabled(handle_)); + for (size_t i = 0; i < kSizeEnable; ++i) { + EXPECT_EQ(0, WebRtc_enable_robust_validation(handle_, kEnable[i])); + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + Init(); + // Unaffected over a reset. + EXPECT_EQ(kEnable[i], WebRtc_is_robust_validation_enabled(handle_)); + } +} + +TEST_F(DelayEstimatorTest, InitializedSpectrumAfterProcess) { + // In this test we verify that the mean spectra are initialized after first + // time we call WebRtc_AddFarSpectrum() and Process() respectively. The test + // also verifies the state is not left for zero spectra. + const float kZerosFloat[kSpectrumSize] = { 0.0 }; + const uint16_t kZerosU16[kSpectrumSize] = { 0 }; + + // For floating point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, kZerosFloat, + spectrum_size_)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_)); + EXPECT_EQ(1, self_->near_spectrum_initialized); + + // For fixed point operations, process one frame and verify initialization + // flag. + Init(); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, farend_self_->far_spectrum_initialized); + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 0)); + EXPECT_EQ(1, farend_self_->far_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, kZerosU16, + spectrum_size_, 0)); + EXPECT_EQ(0, self_->near_spectrum_initialized); + EXPECT_EQ(-2, WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 0)); + EXPECT_EQ(1, self_->near_spectrum_initialized); +} + +TEST_F(DelayEstimatorTest, CorrectLastDelay) { + // In this test we verify that we get the correct last delay upon valid call. + // We simply process the same data until we leave the initialized state + // (|last_delay| = -2). Then we compare the Process() output with the + // last_delay() call. + + // TODO(bjornv): Update quality values for robust validation. + int last_delay = 0; + // Floating point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ(0, WebRtc_AddFarSpectrumFloat(farend_handle_, far_f_, + spectrum_size_)); + last_delay = WebRtc_DelayEstimatorProcessFloat(handle_, near_f_, + spectrum_size_); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); + + // Fixed point operations. + Init(); + for (int i = 0; i < 200; i++) { + EXPECT_EQ(0, WebRtc_AddFarSpectrumFix(farend_handle_, far_u16_, + spectrum_size_, 0)); + last_delay = WebRtc_DelayEstimatorProcessFix(handle_, near_u16_, + spectrum_size_, 0); + if (last_delay != -2) { + EXPECT_EQ(last_delay, WebRtc_last_delay(handle_)); + if (!WebRtc_is_robust_validation_enabled(handle_)) { + EXPECT_FLOAT_EQ(7203.f / kMaxBitCountsQ9, + WebRtc_last_delay_quality(handle_)); + } + break; + } + } + // Verify that we have left the initialized state. + EXPECT_NE(-2, WebRtc_last_delay(handle_)); + EXPECT_LT(0, WebRtc_last_delay_quality(handle_)); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimatorFarend) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator (far-end part). + + BinaryDelayEstimatorFarend* binary = binary_farend_; + // WebRtc_CreateBinaryDelayEstimatorFarend() should return -1 if the input + // history size is less than 2. This is to make sure the buffer shifting + // applies properly. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary = WebRtc_CreateBinaryDelayEstimatorFarend(1); + EXPECT_TRUE(binary == NULL); +} + +TEST_F(DelayEstimatorTest, CorrectErrorReturnsOfBinaryEstimator) { + // In this test we verify correct output on invalid API calls to the Binary + // Delay Estimator. + + BinaryDelayEstimator* binary_handle = binary_; + // WebRtc_CreateBinaryDelayEstimator() should return -1 if we have a NULL + // pointer as |binary_farend| or invalid input values. Upon failure, the + // |binary_handle| should be NULL. + // Make sure we have a non-NULL value at start, so we can detect NULL after + // create failure. + binary_handle = WebRtc_CreateBinaryDelayEstimator(NULL, kLookahead); + EXPECT_TRUE(binary_handle == NULL); + binary_handle = WebRtc_CreateBinaryDelayEstimator(binary_farend_, -1); + EXPECT_TRUE(binary_handle == NULL); +} + +TEST_F(DelayEstimatorTest, MeanEstimatorFix) { + // In this test we verify that we update the mean value in correct direction + // only. With "direction" we mean increase or decrease. + + int32_t mean_value = 4000; + int32_t mean_value_before = mean_value; + int32_t new_mean_value = mean_value * 2; + + // Increasing |mean_value|. + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_LT(mean_value_before, mean_value); + EXPECT_GT(new_mean_value, mean_value); + + // Decreasing |mean_value|. + new_mean_value = mean_value / 2; + mean_value_before = mean_value; + WebRtc_MeanEstimatorFix(new_mean_value, 10, &mean_value); + EXPECT_GT(mean_value_before, mean_value); + EXPECT_LT(new_mean_value, mean_value); +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearSameSpectrum) { + // In this test we verify that we get the correct delay estimates if we shift + // the signal accordingly. We create two Binary Delay Estimators and feed them + // with the same signals, so they should output the same results. + // We verify both causal and non-causal delays. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentSpectrum) { + // In this test we use the same setup as above, but we now feed the two Binary + // Delay Estimators with different signals, so they should output different + // results. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kNearOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(kNearOffset, 0, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, ExactDelayEstimateMultipleNearDifferentLookahead) { + // In this test we use the same setup as above, feeding the two Binary + // Delay Estimators with the same signals. The difference is that we create + // them with different lookahead. + // For these noise free signals, the robust validation should not have an + // impact, hence we turn robust validation on/off for both reference and + // delayed near end. + + const int kLookaheadOffset = 1; + for (size_t i = 0; i < kSizeEnable; ++i) { + for (size_t j = 0; j < kSizeEnable; ++j) { + RunBinarySpectraTest(0, kLookaheadOffset, kEnable[i], kEnable[j]); + } + } +} + +TEST_F(DelayEstimatorTest, AllowedOffsetNoImpactWhenRobustValidationDisabled) { + // The same setup as in ExactDelayEstimateMultipleNearSameSpectrum with the + // difference that |allowed_offset| is set for the reference binary delay + // estimator. + + binary_->allowed_offset = 10; + RunBinarySpectraTest(0, 0, 0, 0); + binary_->allowed_offset = 0; // Reset reference. +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadAtCreate) { + void* farend_handle = WebRtc_CreateDelayEstimatorFarend(kSpectrumSize, + kMaxDelay); + ASSERT_TRUE(farend_handle != NULL); + void* handle = WebRtc_CreateDelayEstimator(farend_handle, kLookahead); + ASSERT_TRUE(handle != NULL); + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle)); + WebRtc_FreeDelayEstimator(handle); + WebRtc_FreeDelayEstimatorFarend(farend_handle); +} + +TEST_F(DelayEstimatorTest, VerifyLookaheadIsSetAndKeptAfterInit) { + EXPECT_EQ(kLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(kDifferentLookahead, + WebRtc_set_lookahead(handle_, kDifferentLookahead)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentLookahead, WebRtc_lookahead(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeAtCreate) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); +} + +TEST_F(DelayEstimatorTest, VerifyHistorySizeIsSetAndKeptAfterInit) { + EXPECT_EQ(kHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(kDifferentHistorySize, + WebRtc_set_history_size(handle_, kDifferentHistorySize)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimator(handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); + EXPECT_EQ(0, WebRtc_InitDelayEstimatorFarend(farend_handle_)); + EXPECT_EQ(kDifferentHistorySize, WebRtc_history_size(handle_)); +} + +// TODO(bjornv): Add tests for SoftReset...(...). + +} // namespace diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc new file mode 100644 index 0000000000..f907c80a35 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.cc @@ -0,0 +1,486 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/delay_estimator_wrapper.h" + +#include <stdlib.h> +#include <string.h> + +#include "modules/audio_processing/utility/delay_estimator.h" +#include "modules/audio_processing/utility/delay_estimator_internal.h" +#include "rtc_base/checks.h" + +// Only bit |kBandFirst| through bit |kBandLast| are processed and +// |kBandFirst| - |kBandLast| must be < 32. +enum { kBandFirst = 12 }; +enum { kBandLast = 43 }; + +static __inline uint32_t SetBit(uint32_t in, int pos) { + uint32_t mask = (1 << pos); + uint32_t out = (in | mask); + + return out; +} + +// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(), +// but for float. +// +// Inputs: +// - new_value : New additional value. +// - scale : Scale for smoothing (should be less than 1.0). +// +// Input/Output: +// - mean_value : Pointer to the mean value for updating. +// +static void MeanEstimatorFloat(float new_value, + float scale, + float* mean_value) { + RTC_DCHECK_LT(scale, 1.0f); + *mean_value += (new_value - *mean_value) * scale; +} + +// Computes the binary spectrum by comparing the input |spectrum| with a +// |threshold_spectrum|. Float and fixed point versions. +// +// Inputs: +// - spectrum : Spectrum of which the binary spectrum should be +// calculated. +// - threshold_spectrum : Threshold spectrum with which the input +// spectrum is compared. +// Return: +// - out : Binary spectrum. +// +static uint32_t BinarySpectrumFix(const uint16_t* spectrum, + SpectrumType* threshold_spectrum, + int q_domain, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + + RTC_DCHECK_LT(q_domain, 16); + + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + threshold_spectrum[i].int32_ = (spectrum_q15 >> 1); + *threshold_initialized = 1; + } + } + } + for (i = kBandFirst; i <= kBandLast; i++) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + // Update the |threshold_spectrum|. + WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum_q15 > threshold_spectrum[i].int32_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +static uint32_t BinarySpectrumFloat(const float* spectrum, + SpectrumType* threshold_spectrum, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + const float kScale = 1 / 64.0; + + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0.0f) { + threshold_spectrum[i].float_ = (spectrum[i] / 2); + *threshold_initialized = 1; + } + } + } + + for (i = kBandFirst; i <= kBandLast; i++) { + // Update the |threshold_spectrum|. + MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum[i] > threshold_spectrum[i].float_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +void WebRtc_FreeDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + + if (handle == NULL) { + return; + } + + free(self->mean_far_spectrum); + self->mean_far_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend); + self->binary_farend = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) { + DelayEstimatorFarend* self = NULL; + + // Check if the sub band used in the delay estimation is small enough to fit + // the binary spectra in a uint32_t. + static_assert(kBandLast - kBandFirst < 32, ""); + + if (spectrum_size >= kBandLast) { + self = static_cast<DelayEstimatorFarend*>( + malloc(sizeof(DelayEstimatorFarend))); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the binary far-end spectrum handling. + self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size); + memory_fail |= (self->binary_farend == NULL); + + // Allocate memory for spectrum buffers. + self->mean_far_spectrum = + static_cast<SpectrumType*>(malloc(spectrum_size * sizeof(SpectrumType))); + memory_fail |= (self->mean_far_spectrum == NULL); + + self->spectrum_size = spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimatorFarend(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + + if (self == NULL) { + return -1; + } + + // Initialize far-end part of binary delay estimator. + WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend); + + // Set averaged far and near end spectra to zero. + memset(self->mean_far_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->far_spectrum_initialized = 0; + + return 0; +} + +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + RTC_DCHECK(self); + WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift); +} + +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (far_q > 15) { + // If |far_q| is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum, + far_q, &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum, + &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +void WebRtc_FreeDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (handle == NULL) { + return; + } + + free(self->mean_near_spectrum); + self->mean_near_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimator(self->binary_handle); + self->binary_handle = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) { + DelayEstimator* self = NULL; + DelayEstimatorFarend* farend = (DelayEstimatorFarend*) farend_handle; + + if (farend_handle != NULL) { + self = static_cast<DelayEstimator*>(malloc(sizeof(DelayEstimator))); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the farend spectrum handling. + self->binary_handle = + WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead); + memory_fail |= (self->binary_handle == NULL); + + // Allocate memory for spectrum buffers. + self->mean_near_spectrum = static_cast<SpectrumType*>( + malloc(farend->spectrum_size * sizeof(SpectrumType))); + memory_fail |= (self->mean_near_spectrum == NULL); + + self->spectrum_size = farend->spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + + // Initialize binary delay estimator. + WebRtc_InitBinaryDelayEstimator(self->binary_handle); + + // Set averaged far and near end spectra to zero. + memset(self->mean_near_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->near_spectrum_initialized = 0; + + return 0; +} + +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) { + DelayEstimator* self = (DelayEstimator*) handle; + RTC_DCHECK(self); + return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift); +} + +int WebRtc_set_history_size(void* handle, int history_size) { + DelayEstimator* self = static_cast<DelayEstimator*>(handle); + + if ((self == NULL) || (history_size <= 1)) { + return -1; + } + return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size); +} + +int WebRtc_history_size(const void* handle) { + const DelayEstimator* self = static_cast<const DelayEstimator*>(handle); + + if (self == NULL) { + return -1; + } + if (self->binary_handle->farend->history_size != + self->binary_handle->history_size) { + // Non matching history sizes. + return -1; + } + return self->binary_handle->history_size; +} + +int WebRtc_set_lookahead(void* handle, int lookahead) { + DelayEstimator* self = (DelayEstimator*) handle; + RTC_DCHECK(self); + RTC_DCHECK(self->binary_handle); + if ((lookahead > self->binary_handle->near_history_size - 1) || + (lookahead < 0)) { + return -1; + } + self->binary_handle->lookahead = lookahead; + return self->binary_handle->lookahead; +} + +int WebRtc_lookahead(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + RTC_DCHECK(self); + RTC_DCHECK(self->binary_handle); + return self->binary_handle->lookahead; +} + +int WebRtc_set_allowed_offset(void* handle, int allowed_offset) { + DelayEstimator* self = (DelayEstimator*) handle; + + if ((self == NULL) || (allowed_offset < 0)) { + return -1; + } + self->binary_handle->allowed_offset = allowed_offset; + return 0; +} + +int WebRtc_get_allowed_offset(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->allowed_offset; +} + +int WebRtc_enable_robust_validation(void* handle, int enable) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + if ((enable < 0) || (enable > 1)) { + return -1; + } + RTC_DCHECK(self->binary_handle); + self->binary_handle->robust_validation_enabled = enable; + return 0; +} + +int WebRtc_is_robust_validation_enabled(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->robust_validation_enabled; +} + +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q) { + DelayEstimator* self = (DelayEstimator*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (near_q > 15) { + // If |near_q| is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectra. + binary_spectrum = BinarySpectrumFix(near_spectrum, + self->mean_near_spectrum, + near_q, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size) { + DelayEstimator* self = (DelayEstimator*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_last_delay(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + + return WebRtc_binary_last_delay(self->binary_handle); +} + +float WebRtc_last_delay_quality(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + RTC_DCHECK(self); + return WebRtc_binary_last_delay_quality(self->binary_handle); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h new file mode 100644 index 0000000000..6b6e51f82c --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on block by block basis. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ + +#include "typedefs.h" // NOLINT(build/include) + +// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...) +void WebRtc_FreeDelayEstimatorFarend(void* handle); + +// Allocates the memory needed by the far-end part of the delay estimation. The +// memory needs to be initialized separately through +// WebRtc_InitDelayEstimatorFarend(...). +// +// Inputs: +// - spectrum_size : Size of the spectrum used both in far-end and +// near-end. Used to allocate memory for spectrum +// specific buffers. +// - history_size : The far-end history buffer size. A change in buffer +// size can be forced with WebRtc_set_history_size(). +// Note that the maximum delay which can be estimated is +// determined together with WebRtc_set_lookahead(). +// +// Return value: +// - void* : Created |handle|. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size); + +// Initializes the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...) +int WebRtc_InitDelayEstimatorFarend(void* handle); + +// Soft resets the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...). +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift); + +// Adds the far-end spectrum to the far-end history buffer. This spectrum is +// used as reference when calculating the delay using +// WebRtc_ProcessSpectrum(). +// +// Inputs: +// - far_spectrum : Far-end spectrum. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - far_q : The Q-domain of the far-end data. +// +// Output: +// - handle : Updated far-end instance. +// +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q); + +// See WebRtc_AddFarSpectrumFix() for description. +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size); + +// Releases the memory allocated by WebRtc_CreateDelayEstimator(...) +void WebRtc_FreeDelayEstimator(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately through WebRtc_InitDelayEstimator(...). +// +// Inputs: +// - farend_handle : Pointer to the far-end part of the delay estimation +// instance created prior to this call using +// WebRtc_CreateDelayEstimatorFarend(). +// +// Note that WebRtc_CreateDelayEstimator does not take +// ownership of |farend_handle|, which has to be torn +// down properly after this instance. +// +// - max_lookahead : Maximum amount of non-causal lookahead allowed. The +// actual amount of lookahead used can be controlled by +// WebRtc_set_lookahead(...). The default |lookahead| is +// set to |max_lookahead| at create time. Use +// WebRtc_set_lookahead(...) before start if a different +// value is desired. +// +// Using lookahead can detect cases in which a near-end +// signal occurs before the corresponding far-end signal. +// It will delay the estimate for the current block by an +// equal amount, and the returned values will be offset +// by it. +// +// A value of zero is the typical no-lookahead case. +// This also represents the minimum delay which can be +// estimated. +// +// Note that the effective range of delay estimates is +// [-|lookahead|,... ,|history_size|-|lookahead|) +// where |history_size| is set through +// WebRtc_set_history_size(). +// +// Return value: +// - void* : Created |handle|. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead); + +// Initializes the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +int WebRtc_InitDelayEstimator(void* handle); + +// Soft resets the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift); + +// Sets the effective |history_size| used. Valid values from 2. We simply need +// at least two delays to compare to perform an estimate. If |history_size| is +// changed, buffers are reallocated filling in with zeros if necessary. +// Note that changing the |history_size| affects both buffers in far-end and +// near-end. Hence it is important to change all DelayEstimators that use the +// same reference far-end, to the same |history_size| value. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - history_size : Effective history size to be used. +// Return value: +// - new_history_size : The new history size used. If the memory was not able +// to be allocated 0 is returned. +int WebRtc_set_history_size(void* handle, int history_size); + +// Returns the history_size currently used. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_history_size(const void* handle); + +// Sets the amount of |lookahead| to use. Valid values are [0, max_lookahead] +// where |max_lookahead| was set at create time through +// WebRtc_CreateDelayEstimator(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// - lookahead : The amount of lookahead to be used. +// +// Return value: +// - new_lookahead : The actual amount of lookahead set, unless |handle| is +// a NULL pointer or |lookahead| is invalid, for which an +// error is returned. +int WebRtc_set_lookahead(void* handle, int lookahead); + +// Returns the amount of lookahead we currently use. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_lookahead(void* handle); + +// Sets the |allowed_offset| used in the robust validation scheme. If the +// delay estimator is used in an echo control component, this parameter is +// related to the filter length. In principle |allowed_offset| should be set to +// the echo control filter length minus the expected echo duration, i.e., the +// delay offset the echo control can handle without quality regression. The +// default value, used if not set manually, is zero. Note that |allowed_offset| +// has to be non-negative. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - allowed_offset : The amount of delay offset, measured in partitions, +// the echo control filter can handle. +int WebRtc_set_allowed_offset(void* handle, int allowed_offset); + +// Returns the |allowed_offset| in number of partitions. +int WebRtc_get_allowed_offset(const void* handle); + +// Enables/Disables a robust validation functionality in the delay estimation. +// This is by default set to disabled at create time. The state is preserved +// over a reset. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - enable : Enable (1) or disable (0) this feature. +int WebRtc_enable_robust_validation(void* handle, int enable); + +// Returns 1 if robust validation is enabled and 0 if disabled. +int WebRtc_is_robust_validation_enabled(const void* handle); + +// Estimates and returns the delay between the far-end and near-end blocks. The +// value will be offset by the lookahead (i.e. the lookahead should be +// subtracted from the returned value). +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - near_spectrum : Pointer to the near-end spectrum data of the current +// block. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - near_q : The Q-domain of the near-end data. +// +// Output: +// - handle : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q); + +// See WebRtc_DelayEstimatorProcessFix() for description. +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size); + +// Returns the last calculated delay updated by the function +// WebRtc_DelayEstimatorProcess(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_last_delay(void* handle); + +// Returns the estimation quality/probability of the last calculated delay +// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation +// quality is a value in the interval [0, 1]. The higher the value, the better +// the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated delay. +float WebRtc_last_delay_quality(void* handle); + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc new file mode 100644 index 0000000000..59631117a2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.cc @@ -0,0 +1,543 @@ +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + * + * Changes by the WebRTC authors: + * - Trivial type modifications. + * - Minimal code subset to do rdft of length 128. + * - Optimizations because of known length. + * - Removed the global variables by moving the code in to a class in order + * to make it thread safe. + * + * All changes are covered by the WebRTC license and IP grant: + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing//utility/ooura_fft.h" + +#include <math.h> + +#include "modules/audio_processing/utility/ooura_fft_tables_common.h" +#include "system_wrappers/include/cpu_features_wrapper.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +namespace { + +#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON)) +static void cft1st_128_C(float* a) { + const int n = 128; + int j, k1, k2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + // The processing of the first set of elements was simplified in C to avoid + // some operations (multiplication by zero or one, addition of two elements + // multiplied by the same weight, ...). + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = rdft_w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = rdft_w[k1 + 0]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2 + 0]; + wk1i = rdft_w[k2 + 1]; + wk3r = rdft_wk3ri_first[k1 + 0]; + wk3i = rdft_wk3ri_first[k1 + 1]; + x0r = a[j + 0] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j + 0] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j + 0] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; + wk3r = rdft_wk3ri_second[k1 + 0]; + wk3i = rdft_wk3ri_second[k1 + 1]; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + +static void cftmdl_128_C(float* a) { + const int l = 8; + const int n = 128; + const int m = 32; + int j0, j1, j2, j3, k, k1, k2, m2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + for (j0 = 0; j0 < l; j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j2 + 0] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1 + 0] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3 + 0] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = rdft_w[2]; + for (j0 = m; j0 < l + m; j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + a[j2 + 0] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1 + 0] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3 + 0] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = rdft_w[k1 + 0]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2 + 0]; + wk1i = rdft_w[k2 + 1]; + wk3r = rdft_wk3ri_first[k1 + 0]; + wk3i = rdft_wk3ri_first[k1 + 1]; + for (j0 = k; j0 < l + k; j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2 + 0] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1 + 0] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 + 0] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; + wk3r = rdft_wk3ri_second[k1 + 0]; + wk3i = rdft_wk3ri_second[k1 + 1]; + for (j0 = k + m; j0 < l + (k + m); j0 += 2) { + j1 = j0 + 8; + j2 = j0 + 16; + j3 = j0 + 24; + x0r = a[j0 + 0] + a[j1 + 0]; + x0i = a[j0 + 1] + a[j1 + 1]; + x1r = a[j0 + 0] - a[j1 + 0]; + x1i = a[j0 + 1] - a[j1 + 1]; + x2r = a[j2 + 0] + a[j3 + 0]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2 + 0] - a[j3 + 0]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j0 + 0] = x0r + x2r; + a[j0 + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2 + 0] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1 + 0] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3 + 0] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + +static void rftfsub_128_C(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +static void rftbsub_128_C(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} +#endif + + +} // namespace + +OouraFft::OouraFft() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0); +#else + use_sse2_ = false; +#endif +} + +OouraFft::~OouraFft() = default; + +void OouraFft::Fft(float* a) const { + float xi; + bitrv2_128(a); + cftfsub_128(a); + rftfsub_128(a); + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; +} +void OouraFft::InverseFft(float* a) const { + a[1] = 0.5f * (a[0] - a[1]); + a[0] -= a[1]; + rftbsub_128(a); + bitrv2_128(a); + cftbsub_128(a); +} + +void OouraFft::cft1st_128(float* a) const { +#if defined(MIPS_FPU_LE) + cft1st_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + cft1st_128_neon(a); +#elif defined(WEBRTC_ARCH_X86_FAMILY) + if (use_sse2_) { + cft1st_128_SSE2(a); + } else { + cft1st_128_C(a); + } +#else + cft1st_128_C(a); +#endif +} +void OouraFft::cftmdl_128(float* a) const { +#if defined(MIPS_FPU_LE) + cftmdl_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + cftmdl_128_neon(a); +#elif defined(WEBRTC_ARCH_X86_FAMILY) + if (use_sse2_) { + cftmdl_128_SSE2(a); + } else { + cftmdl_128_C(a); + } +#else + cftmdl_128_C(a); +#endif +} +void OouraFft::rftfsub_128(float* a) const { +#if defined(MIPS_FPU_LE) + rftfsub_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + rftfsub_128_neon(a); +#elif defined(WEBRTC_ARCH_X86_FAMILY) + if (use_sse2_) { + rftfsub_128_SSE2(a); + } else { + rftfsub_128_C(a); + } +#else + rftfsub_128_C(a); +#endif +} + +void OouraFft::rftbsub_128(float* a) const { +#if defined(MIPS_FPU_LE) + rftbsub_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + rftbsub_128_neon(a); +#elif defined(WEBRTC_ARCH_X86_FAMILY) + if (use_sse2_) { + rftbsub_128_SSE2(a); + } else { + rftbsub_128_C(a); + } +#else + rftbsub_128_C(a); +#endif +} + +void OouraFft::cftbsub_128(float* a) const { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(a); + l = 32; + + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } +} + +void OouraFft::cftfsub_128(float* a) const { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(a); + l = 32; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } +} + +void OouraFft::bitrv2_128(float* a) const { + /* + Following things have been attempted but are no faster: + (a) Storing the swap indexes in a LUT (index calculations are done + for 'free' while waiting on memory/L1). + (b) Consolidate the load/store of two consecutive floats by a 64 bit + integer (execution is memory/L1 bound). + (c) Do a mix of floats and 64 bit integer to maximize register + utilization (execution is memory/L1 bound). + (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). + (e) Hard-coding of the offsets to completely eliminates index + calculations. + */ + + unsigned int j, j1, k, k1; + float xr, xi, yr, yi; + + const int ip[4] = {0, 64, 32, 96}; + for (k = 0; k < 4; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 -= 8; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + 8 + ip[k]; + k1 = j1 + 8; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.h new file mode 100644 index 0000000000..96d57dc908 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_ + +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +#if defined(WEBRTC_ARCH_X86_FAMILY) +void cft1st_128_SSE2(float* a); +void cftmdl_128_SSE2(float* a); +void rftfsub_128_SSE2(float* a); +void rftbsub_128_SSE2(float* a); +#endif + +#if defined(MIPS_FPU_LE) +void cft1st_128_mips(float* a); +void cftmdl_128_mips(float* a); +void rftfsub_128_mips(float* a); +void rftbsub_128_mips(float* a); +#endif + +#if defined(WEBRTC_HAS_NEON) +void cft1st_128_neon(float* a); +void cftmdl_128_neon(float* a); +void rftfsub_128_neon(float* a); +void rftbsub_128_neon(float* a); +#endif + +class OouraFft { + public: + OouraFft(); + ~OouraFft(); + void Fft(float* a) const; + void InverseFft(float* a) const; + + private: + void cft1st_128(float* a) const; + void cftmdl_128(float* a) const; + void rftfsub_128(float* a) const; + void rftbsub_128(float* a) const; + + void cftfsub_128(float* a) const; + void cftbsub_128(float* a) const; + void bitrv2_128(float* a) const; + bool use_sse2_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc new file mode 100644 index 0000000000..569e1d7e82 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc @@ -0,0 +1,1185 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/utility/ooura_fft.h" + +#include "modules/audio_processing/utility/ooura_fft_tables_common.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +#if defined(MIPS_FPU_LE) +void bitrv2_128_mips(float* a) { + // n is 128 + float xr, xi, yr, yi; + + xr = a[8]; + xi = a[9]; + yr = a[16]; + yi = a[17]; + a[8] = yr; + a[9] = yi; + a[16] = xr; + a[17] = xi; + + xr = a[64]; + xi = a[65]; + yr = a[2]; + yi = a[3]; + a[64] = yr; + a[65] = yi; + a[2] = xr; + a[3] = xi; + + xr = a[72]; + xi = a[73]; + yr = a[18]; + yi = a[19]; + a[72] = yr; + a[73] = yi; + a[18] = xr; + a[19] = xi; + + xr = a[80]; + xi = a[81]; + yr = a[10]; + yi = a[11]; + a[80] = yr; + a[81] = yi; + a[10] = xr; + a[11] = xi; + + xr = a[88]; + xi = a[89]; + yr = a[26]; + yi = a[27]; + a[88] = yr; + a[89] = yi; + a[26] = xr; + a[27] = xi; + + xr = a[74]; + xi = a[75]; + yr = a[82]; + yi = a[83]; + a[74] = yr; + a[75] = yi; + a[82] = xr; + a[83] = xi; + + xr = a[32]; + xi = a[33]; + yr = a[4]; + yi = a[5]; + a[32] = yr; + a[33] = yi; + a[4] = xr; + a[5] = xi; + + xr = a[40]; + xi = a[41]; + yr = a[20]; + yi = a[21]; + a[40] = yr; + a[41] = yi; + a[20] = xr; + a[21] = xi; + + xr = a[48]; + xi = a[49]; + yr = a[12]; + yi = a[13]; + a[48] = yr; + a[49] = yi; + a[12] = xr; + a[13] = xi; + + xr = a[56]; + xi = a[57]; + yr = a[28]; + yi = a[29]; + a[56] = yr; + a[57] = yi; + a[28] = xr; + a[29] = xi; + + xr = a[34]; + xi = a[35]; + yr = a[68]; + yi = a[69]; + a[34] = yr; + a[35] = yi; + a[68] = xr; + a[69] = xi; + + xr = a[42]; + xi = a[43]; + yr = a[84]; + yi = a[85]; + a[42] = yr; + a[43] = yi; + a[84] = xr; + a[85] = xi; + + xr = a[50]; + xi = a[51]; + yr = a[76]; + yi = a[77]; + a[50] = yr; + a[51] = yi; + a[76] = xr; + a[77] = xi; + + xr = a[58]; + xi = a[59]; + yr = a[92]; + yi = a[93]; + a[58] = yr; + a[59] = yi; + a[92] = xr; + a[93] = xi; + + xr = a[44]; + xi = a[45]; + yr = a[52]; + yi = a[53]; + a[44] = yr; + a[45] = yi; + a[52] = xr; + a[53] = xi; + + xr = a[96]; + xi = a[97]; + yr = a[6]; + yi = a[7]; + a[96] = yr; + a[97] = yi; + a[6] = xr; + a[7] = xi; + + xr = a[104]; + xi = a[105]; + yr = a[22]; + yi = a[23]; + a[104] = yr; + a[105] = yi; + a[22] = xr; + a[23] = xi; + + xr = a[112]; + xi = a[113]; + yr = a[14]; + yi = a[15]; + a[112] = yr; + a[113] = yi; + a[14] = xr; + a[15] = xi; + + xr = a[120]; + xi = a[121]; + yr = a[30]; + yi = a[31]; + a[120] = yr; + a[121] = yi; + a[30] = xr; + a[31] = xi; + + xr = a[98]; + xi = a[99]; + yr = a[70]; + yi = a[71]; + a[98] = yr; + a[99] = yi; + a[70] = xr; + a[71] = xi; + + xr = a[106]; + xi = a[107]; + yr = a[86]; + yi = a[87]; + a[106] = yr; + a[107] = yi; + a[86] = xr; + a[87] = xi; + + xr = a[114]; + xi = a[115]; + yr = a[78]; + yi = a[79]; + a[114] = yr; + a[115] = yi; + a[78] = xr; + a[79] = xi; + + xr = a[122]; + xi = a[123]; + yr = a[94]; + yi = a[95]; + a[122] = yr; + a[123] = yi; + a[94] = xr; + a[95] = xi; + + xr = a[100]; + xi = a[101]; + yr = a[38]; + yi = a[39]; + a[100] = yr; + a[101] = yi; + a[38] = xr; + a[39] = xi; + + xr = a[108]; + xi = a[109]; + yr = a[54]; + yi = a[55]; + a[108] = yr; + a[109] = yi; + a[54] = xr; + a[55] = xi; + + xr = a[116]; + xi = a[117]; + yr = a[46]; + yi = a[47]; + a[116] = yr; + a[117] = yi; + a[46] = xr; + a[47] = xi; + + xr = a[124]; + xi = a[125]; + yr = a[62]; + yi = a[63]; + a[124] = yr; + a[125] = yi; + a[62] = xr; + a[63] = xi; + + xr = a[110]; + xi = a[111]; + yr = a[118]; + yi = a[119]; + a[110] = yr; + a[111] = yi; + a[118] = xr; + a[119] = xi; +} + +void cft1st_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; + int a_ptr, p1_rdft, p2_rdft, count; + const float* first = rdft_wk3ri_first; + const float* second = rdft_wk3ri_second; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + // first 8 + "lwc1 %[f0], 0(%[a]) \n\t" + "lwc1 %[f1], 4(%[a]) \n\t" + "lwc1 %[f2], 8(%[a]) \n\t" + "lwc1 %[f3], 12(%[a]) \n\t" + "lwc1 %[f4], 16(%[a]) \n\t" + "lwc1 %[f5], 20(%[a]) \n\t" + "lwc1 %[f6], 24(%[a]) \n\t" + "lwc1 %[f7], 28(%[a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "sub.s %[f2], %[f1], %[f4] \n\t" + "add.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[a]) \n\t" + "swc1 %[f8], 16(%[a]) \n\t" + "swc1 %[f2], 28(%[a]) \n\t" + "swc1 %[f1], 12(%[a]) \n\t" + "swc1 %[f4], 4(%[a]) \n\t" + "swc1 %[f6], 20(%[a]) \n\t" + "swc1 %[f3], 8(%[a]) \n\t" + "swc1 %[f0], 24(%[a]) \n\t" + // second 8 + "lwc1 %[f0], 32(%[a]) \n\t" + "lwc1 %[f1], 36(%[a]) \n\t" + "lwc1 %[f2], 40(%[a]) \n\t" + "lwc1 %[f3], 44(%[a]) \n\t" + "lwc1 %[f4], 48(%[a]) \n\t" + "lwc1 %[f5], 52(%[a]) \n\t" + "lwc1 %[f6], 56(%[a]) \n\t" + "lwc1 %[f7], 60(%[a]) \n\t" + "add.s %[f8], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f4], %[f1] \n\t" + "sub.s %[f4], %[f4], %[f1] \n\t" + "add.s %[f1], %[f3], %[f8] \n\t" + "sub.s %[f3], %[f3], %[f8] \n\t" + "sub.s %[f8], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f6], %[f2] \n\t" + "sub.s %[f6], %[f2], %[f6] \n\t" + "lwc1 %[f9], 8(%[rdft_w]) \n\t" + "sub.s %[f2], %[f8], %[f7] \n\t" + "add.s %[f8], %[f8], %[f7] \n\t" + "sub.s %[f7], %[f4], %[f0] \n\t" + "add.s %[f4], %[f4], %[f0] \n\t" + // prepare for loop + "addiu %[a_ptr], %[a], 64 \n\t" + "addiu %[p1_rdft], %[rdft_w], 8 \n\t" + "addiu %[p2_rdft], %[rdft_w], 16 \n\t" + "addiu %[count], $zero, 7 \n\t" + // finish second 8 + "mul.s %[f2], %[f9], %[f2] \n\t" + "mul.s %[f8], %[f9], %[f8] \n\t" + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f4], %[f9], %[f4] \n\t" + "swc1 %[f1], 32(%[a]) \n\t" + "swc1 %[f3], 52(%[a]) \n\t" + "swc1 %[f5], 36(%[a]) \n\t" + "swc1 %[f6], 48(%[a]) \n\t" + "swc1 %[f2], 40(%[a]) \n\t" + "swc1 %[f8], 44(%[a]) \n\t" + "swc1 %[f7], 56(%[a]) \n\t" + "swc1 %[f4], 60(%[a]) \n\t" + // loop + "1: \n\t" + "lwc1 %[f0], 0(%[a_ptr]) \n\t" + "lwc1 %[f1], 4(%[a_ptr]) \n\t" + "lwc1 %[f2], 8(%[a_ptr]) \n\t" + "lwc1 %[f3], 12(%[a_ptr]) \n\t" + "lwc1 %[f4], 16(%[a_ptr]) \n\t" + "lwc1 %[f5], 20(%[a_ptr]) \n\t" + "lwc1 %[f6], 24(%[a_ptr]) \n\t" + "lwc1 %[f7], 28(%[a_ptr]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f10], 4(%[p1_rdft]) \n\t" + "lwc1 %[f11], 0(%[p2_rdft]) \n\t" + "lwc1 %[f12], 4(%[p2_rdft]) \n\t" + "lwc1 %[f13], 8(%[first]) \n\t" + "lwc1 %[f14], 12(%[first]) \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "add.s %[f3], %[f0], %[f5] \n\t" + "sub.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "swc1 %[f7], 0(%[a_ptr]) \n\t" + "swc1 %[f2], 4(%[a_ptr]) \n\t" + "mul.s %[f4], %[f9], %[f8] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f8], %[f10], %[f8] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f0], %[f12], %[f0] \n\t" + "mul.s %[f2], %[f13], %[f3] \n\t" + "mul.s %[f3], %[f14], %[f3] \n\t" + "nmsub.s %[f4], %[f4], %[f10], %[f6] \n\t" + "madd.s %[f8], %[f8], %[f9], %[f6] \n\t" + "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" + "madd.s %[f0], %[f0], %[f11], %[f5] \n\t" + "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" + "madd.s %[f3], %[f3], %[f13], %[f1] \n\t" +#else + "mul.s %[f7], %[f10], %[f6] \n\t" + "mul.s %[f6], %[f9], %[f6] \n\t" + "mul.s %[f8], %[f10], %[f8] \n\t" + "mul.s %[f2], %[f11], %[f0] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f5], %[f12], %[f5] \n\t" + "mul.s %[f0], %[f12], %[f0] \n\t" + "mul.s %[f12], %[f13], %[f3] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "mul.s %[f1], %[f14], %[f1] \n\t" + "mul.s %[f3], %[f14], %[f3] \n\t" + "sub.s %[f4], %[f4], %[f7] \n\t" + "add.s %[f8], %[f6], %[f8] \n\t" + "sub.s %[f7], %[f2], %[f5] \n\t" + "add.s %[f0], %[f11], %[f0] \n\t" + "sub.s %[f2], %[f12], %[f1] \n\t" + "add.s %[f3], %[f13], %[f3] \n\t" +#endif + "swc1 %[f4], 16(%[a_ptr]) \n\t" + "swc1 %[f8], 20(%[a_ptr]) \n\t" + "swc1 %[f7], 8(%[a_ptr]) \n\t" + "swc1 %[f0], 12(%[a_ptr]) \n\t" + "swc1 %[f2], 24(%[a_ptr]) \n\t" + "swc1 %[f3], 28(%[a_ptr]) \n\t" + "lwc1 %[f0], 32(%[a_ptr]) \n\t" + "lwc1 %[f1], 36(%[a_ptr]) \n\t" + "lwc1 %[f2], 40(%[a_ptr]) \n\t" + "lwc1 %[f3], 44(%[a_ptr]) \n\t" + "lwc1 %[f4], 48(%[a_ptr]) \n\t" + "lwc1 %[f5], 52(%[a_ptr]) \n\t" + "lwc1 %[f6], 56(%[a_ptr]) \n\t" + "lwc1 %[f7], 60(%[a_ptr]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f11], 8(%[p2_rdft]) \n\t" + "lwc1 %[f12], 12(%[p2_rdft]) \n\t" + "lwc1 %[f13], 8(%[second]) \n\t" + "lwc1 %[f14], 12(%[second]) \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f2], %[f8] \n\t" + "add.s %[f2], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f3], %[f6] \n\t" + "add.s %[f3], %[f0], %[f5] \n\t" + "sub.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "swc1 %[f7], 32(%[a_ptr]) \n\t" + "swc1 %[f2], 36(%[a_ptr]) \n\t" + "mul.s %[f4], %[f10], %[f8] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f10], %[f10], %[f6] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f2], %[f13], %[f3] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "madd.s %[f4], %[f4], %[f9], %[f6] \n\t" + "nmsub.s %[f10], %[f10], %[f9], %[f8] \n\t" + "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" + "madd.s %[f11], %[f11], %[f12], %[f0] \n\t" + "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" + "madd.s %[f13], %[f13], %[f14], %[f3] \n\t" +#else + "mul.s %[f2], %[f9], %[f6] \n\t" + "mul.s %[f10], %[f10], %[f6] \n\t" + "mul.s %[f9], %[f9], %[f8] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f8], %[f12], %[f5] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f12], %[f12], %[f0] \n\t" + "mul.s %[f5], %[f13], %[f3] \n\t" + "mul.s %[f0], %[f14], %[f1] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "mul.s %[f14], %[f14], %[f3] \n\t" + "add.s %[f4], %[f4], %[f2] \n\t" + "sub.s %[f10], %[f10], %[f9] \n\t" + "sub.s %[f7], %[f7], %[f8] \n\t" + "add.s %[f11], %[f11], %[f12] \n\t" + "sub.s %[f2], %[f5], %[f0] \n\t" + "add.s %[f13], %[f13], %[f14] \n\t" +#endif + "swc1 %[f4], 48(%[a_ptr]) \n\t" + "swc1 %[f10], 52(%[a_ptr]) \n\t" + "swc1 %[f7], 40(%[a_ptr]) \n\t" + "swc1 %[f11], 44(%[a_ptr]) \n\t" + "swc1 %[f2], 56(%[a_ptr]) \n\t" + "swc1 %[f13], 60(%[a_ptr]) \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f9], 8(%[p1_rdft]) \n\t" + "addiu %[a_ptr], %[a_ptr], 64 \n\t" + "addiu %[p1_rdft], %[p1_rdft], 8 \n\t" + "addiu %[p2_rdft], %[p2_rdft], 16 \n\t" + "addiu %[first], %[first], 8 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[second], %[second], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14), + [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first), + [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second) + : [a] "r" (a), [rdft_w] "r" (rdft_w) + : "memory" + ); +} + +void cftmdl_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; + int tmp_a, count; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 64(%[tmp_a]) \n\t" + "swc1 %[f2], 36(%[tmp_a]) \n\t" + "swc1 %[f1], 100(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "swc1 %[f6], 68(%[tmp_a]) \n\t" + "swc1 %[f3], 32(%[tmp_a]) \n\t" + "swc1 %[f0], 96(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); + f9 = rdft_w[2]; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 128 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "sub.s %[f8], %[f0], %[f2] \n\t" + "add.s %[f0], %[f0], %[f2] \n\t" + "sub.s %[f2], %[f5], %[f7] \n\t" + "add.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f1], %[f3] \n\t" + "add.s %[f1], %[f1], %[f3] \n\t" + "sub.s %[f3], %[f4], %[f6] \n\t" + "add.s %[f4], %[f4], %[f6] \n\t" + "sub.s %[f6], %[f8], %[f2] \n\t" + "add.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f5], %[f1] \n\t" + "sub.s %[f5], %[f5], %[f1] \n\t" + "add.s %[f1], %[f3], %[f7] \n\t" + "sub.s %[f3], %[f3], %[f7] \n\t" + "add.s %[f7], %[f0], %[f4] \n\t" + "sub.s %[f0], %[f0], %[f4] \n\t" + "sub.s %[f4], %[f6], %[f1] \n\t" + "add.s %[f6], %[f6], %[f1] \n\t" + "sub.s %[f1], %[f3], %[f8] \n\t" + "add.s %[f3], %[f3], %[f8] \n\t" + "mul.s %[f4], %[f4], %[f9] \n\t" + "mul.s %[f6], %[f6], %[f9] \n\t" + "mul.s %[f1], %[f1], %[f9] \n\t" + "mul.s %[f3], %[f3], %[f9] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f2], 4(%[tmp_a]) \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f0], 68(%[tmp_a]) \n\t" + "swc1 %[f4], 32(%[tmp_a]) \n\t" + "swc1 %[f6], 36(%[tmp_a]) \n\t" + "swc1 %[f1], 96(%[tmp_a]) \n\t" + "swc1 %[f3], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9) + : "memory" + ); + f10 = rdft_w[3]; + f11 = rdft_w[4]; + f12 = rdft_w[5]; + f13 = rdft_wk3ri_first[2]; + f14 = rdft_wk3ri_first[3]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 256 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f8], %[f2] \n\t" + "add.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "sub.s %[f4], %[f6], %[f3] \n\t" + "add.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f8], 0(%[tmp_a]) \n\t" + "swc1 %[f6], 4(%[tmp_a]) \n\t" + "mul.s %[f5], %[f9], %[f7] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f7], %[f10], %[f7] \n\t" + "mul.s %[f8], %[f11], %[f3] \n\t" + "mul.s %[f3], %[f12], %[f3] \n\t" + "mul.s %[f6], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "nmsub.s %[f5], %[f5], %[f10], %[f4] \n\t" + "madd.s %[f7], %[f7], %[f9], %[f4] \n\t" + "nmsub.s %[f8], %[f8], %[f12], %[f2] \n\t" + "madd.s %[f3], %[f3], %[f11], %[f2] \n\t" + "nmsub.s %[f6], %[f6], %[f14], %[f1] \n\t" + "madd.s %[f0], %[f0], %[f13], %[f1] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" +#else + "mul.s %[f8], %[f10], %[f4] \n\t" + "mul.s %[f4], %[f9], %[f4] \n\t" + "mul.s %[f7], %[f10], %[f7] \n\t" + "mul.s %[f6], %[f11], %[f3] \n\t" + "mul.s %[f3], %[f12], %[f3] \n\t" + "sub.s %[f5], %[f5], %[f8] \n\t" + "mul.s %[f8], %[f12], %[f2] \n\t" + "mul.s %[f2], %[f11], %[f2] \n\t" + "add.s %[f7], %[f4], %[f7] \n\t" + "mul.s %[f4], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "sub.s %[f8], %[f6], %[f8] \n\t" + "mul.s %[f6], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "add.s %[f3], %[f2], %[f3] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" + "sub.s %[f6], %[f4], %[f6] \n\t" + "add.s %[f0], %[f1], %[f0] \n\t" +#endif + "swc1 %[f8], 32(%[tmp_a]) \n\t" + "swc1 %[f3], 36(%[tmp_a]) \n\t" + "swc1 %[f6], 96(%[tmp_a]) \n\t" + "swc1 %[f0], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), + [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) + : "memory" + ); + f11 = rdft_w[6]; + f12 = rdft_w[7]; + f13 = rdft_wk3ri_second[2]; + f14 = rdft_wk3ri_second[3]; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 384 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f2], %[f8] \n\t" + "add.s %[f2], %[f2], %[f8] \n\t" + "add.s %[f8], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "sub.s %[f4], %[f3], %[f6] \n\t" + "add.s %[f3], %[f3], %[f6] \n\t" + "sub.s %[f6], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f2], 0(%[tmp_a]) \n\t" + "swc1 %[f3], 4(%[tmp_a]) \n\t" + "mul.s %[f5], %[f10], %[f7] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f2], %[f12], %[f8] \n\t" + "mul.s %[f8], %[f11], %[f8] \n\t" + "mul.s %[f3], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "madd.s %[f5], %[f5], %[f9], %[f4] \n\t" + "msub.s %[f7], %[f7], %[f10], %[f4] \n\t" + "msub.s %[f2], %[f2], %[f11], %[f6] \n\t" + "madd.s %[f8], %[f8], %[f12], %[f6] \n\t" + "msub.s %[f3], %[f3], %[f13], %[f0] \n\t" + "madd.s %[f1], %[f1], %[f14], %[f0] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" +#else + "mul.s %[f2], %[f9], %[f4] \n\t" + "mul.s %[f4], %[f10], %[f4] \n\t" + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f3], %[f11], %[f6] \n\t" + "mul.s %[f6], %[f12], %[f6] \n\t" + "add.s %[f5], %[f5], %[f2] \n\t" + "sub.s %[f7], %[f4], %[f7] \n\t" + "mul.s %[f2], %[f12], %[f8] \n\t" + "mul.s %[f8], %[f11], %[f8] \n\t" + "mul.s %[f4], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "sub.s %[f2], %[f3], %[f2] \n\t" + "mul.s %[f3], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "add.s %[f8], %[f8], %[f6] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" + "sub.s %[f3], %[f3], %[f4] \n\t" + "add.s %[f1], %[f1], %[f0] \n\t" +#endif + "swc1 %[f2], 32(%[tmp_a]) \n\t" + "swc1 %[f8], 36(%[tmp_a]) \n\t" + "swc1 %[f3], 96(%[tmp_a]) \n\t" + "swc1 %[f1], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), + [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) + : "memory" + ); +} + +void cftfsub_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8; + int tmp_a, count; + + cft1st_128_mips(a); + cftmdl_128_mips(a); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 16 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 128(%[tmp_a]) \n\t" + "lwc1 %[f4], 256(%[tmp_a]) \n\t" + "lwc1 %[f6], 384(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 132(%[tmp_a]) \n\t" + "lwc1 %[f5], 260(%[tmp_a]) \n\t" + "lwc1 %[f7], 388(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 256(%[tmp_a]) \n\t" + "swc1 %[f2], 132(%[tmp_a]) \n\t" + "swc1 %[f1], 388(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "swc1 %[f6], 260(%[tmp_a]) \n\t" + "swc1 %[f3], 128(%[tmp_a]) \n\t" + "swc1 %[f0], 384(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), + [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); +} + +void cftbsub_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8; + int tmp_a, count; + + cft1st_128_mips(a); + cftmdl_128_mips(a); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 16 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 128(%[tmp_a]) \n\t" + "lwc1 %[f4], 256(%[tmp_a]) \n\t" + "lwc1 %[f6], 384(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 132(%[tmp_a]) \n\t" + "lwc1 %[f5], 260(%[tmp_a]) \n\t" + "lwc1 %[f7], 388(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f3], %[f1] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "sub.s %[f2], %[f1], %[f4] \n\t" + "add.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f3], %[f6] \n\t" + "sub.s %[f6], %[f3], %[f6] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "neg.s %[f4], %[f4] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 256(%[tmp_a]) \n\t" + "swc1 %[f2], 132(%[tmp_a]) \n\t" + "swc1 %[f1], 388(%[tmp_a]) \n\t" + "swc1 %[f6], 260(%[tmp_a]) \n\t" + "swc1 %[f3], 128(%[tmp_a]) \n\t" + "swc1 %[f0], 384(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); +} + +void rftfsub_128_mips(float* a) { + const float* c = rdft_w + 32; + const float f0 = 0.5f; + float* a1 = &a[2]; + float* a2 = &a[126]; + const float* c1 = &c[1]; + const float* c2 = &c[31]; + float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; + int count; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "addiu %[count], $zero, 15 \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f9], %[f9], %[f8] \n\t" + "add.s %[f6], %[f6], %[f5] \n\t" +#else + "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" + "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f3], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "addiu %[a1], %[a1], 8 \n\t" + "addiu %[a2], %[a2], -8 \n\t" + "addiu %[c1], %[c1], 4 \n\t" + "addiu %[c2], %[c2], -4 \n\t" + "1: \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "lwc1 %[f10], -4(%[c2]) \n\t" + "lwc1 %[f11], 8(%[a1]) \n\t" + "lwc1 %[f12], -8(%[a2]) \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "sub.s %[f9], %[f9], %[f8] \n\t" + "add.s %[f6], %[f6], %[f5] \n\t" +#else + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" + "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f10], %[f0], %[f10] \n\t" + "sub.s %[f5], %[f11], %[f12] \n\t" + "add.s %[f7], %[f13], %[f14] \n\t" + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f3], %[f6] \n\t" + "mul.s %[f8], %[f10], %[f5] \n\t" + "mul.s %[f10], %[f10], %[f7] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f9], %[f15], %[f7] \n\t" + "mul.s %[f15], %[f15], %[f5] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f8], %[f8], %[f9] \n\t" + "add.s %[f10], %[f10], %[f15] \n\t" +#else + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "nmsub.s %[f8], %[f8], %[f15], %[f7] \n\t" + "madd.s %[f10], %[f10], %[f15], %[f5] \n\t" +#endif + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "sub.s %[f11], %[f11], %[f8] \n\t" + "add.s %[f12], %[f12], %[f8] \n\t" + "sub.s %[f13], %[f13], %[f10] \n\t" + "sub.s %[f14], %[f14], %[f10] \n\t" + "addiu %[c2], %[c2], -8 \n\t" + "addiu %[c1], %[c1], 8 \n\t" + "swc1 %[f11], 8(%[a1]) \n\t" + "swc1 %[f12], -8(%[a2]) \n\t" + "swc1 %[f13], 12(%[a1]) \n\t" + "swc1 %[f14], -4(%[a2]) \n\t" + "addiu %[a1], %[a1], 16 \n\t" + "addiu %[count], %[count], -1 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[a2], %[a2], -16 \n\t" + ".set pop \n\t" + : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), + [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), + [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), + [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), + [count] "=&r" (count) + : [f0] "f" (f0) + : "memory" + ); +} + +void rftbsub_128_mips(float* a) { + const float *c = rdft_w + 32; + const float f0 = 0.5f; + float* a1 = &a[2]; + float* a2 = &a[126]; + const float* c1 = &c[1]; + const float* c2 = &c[31]; + float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; + int count; + + a[1] = -a[1]; + a[65] = -a[65]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "addiu %[count], $zero, 15 \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f9], %[f9], %[f8] \n\t" + "sub.s %[f6], %[f6], %[f5] \n\t" +#else + "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" + "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f6], %[f3] \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "addiu %[a1], %[a1], 8 \n\t" + "addiu %[a2], %[a2], -8 \n\t" + "addiu %[c1], %[c1], 4 \n\t" + "addiu %[c2], %[c2], -4 \n\t" + "1: \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "lwc1 %[f10], -4(%[c2]) \n\t" + "lwc1 %[f11], 8(%[a1]) \n\t" + "lwc1 %[f12], -8(%[a2]) \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "add.s %[f9], %[f9], %[f8] \n\t" + "sub.s %[f6], %[f6], %[f5] \n\t" +#else + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" + "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f10], %[f0], %[f10] \n\t" + "sub.s %[f5], %[f11], %[f12] \n\t" + "add.s %[f7], %[f13], %[f14] \n\t" + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f6], %[f3] \n\t" + "mul.s %[f8], %[f10], %[f5] \n\t" + "mul.s %[f10], %[f10], %[f7] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f9], %[f15], %[f7] \n\t" + "mul.s %[f15], %[f15], %[f5] \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "add.s %[f8], %[f8], %[f9] \n\t" + "sub.s %[f10], %[f10], %[f15] \n\t" +#else + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "madd.s %[f8], %[f8], %[f15], %[f7] \n\t" + "nmsub.s %[f10], %[f10], %[f15], %[f5] \n\t" +#endif + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "sub.s %[f11], %[f11], %[f8] \n\t" + "add.s %[f12], %[f12], %[f8] \n\t" + "sub.s %[f13], %[f10], %[f13] \n\t" + "sub.s %[f14], %[f10], %[f14] \n\t" + "addiu %[c2], %[c2], -8 \n\t" + "addiu %[c1], %[c1], 8 \n\t" + "swc1 %[f11], 8(%[a1]) \n\t" + "swc1 %[f12], -8(%[a2]) \n\t" + "swc1 %[f13], 12(%[a1]) \n\t" + "swc1 %[f14], -4(%[a2]) \n\t" + "addiu %[a1], %[a1], 16 \n\t" + "addiu %[count], %[count], -1 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[a2], %[a2], -16 \n\t" + ".set pop \n\t" + : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), + [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), + [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), + [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), + [count] "=&r" (count) + : [f0] "f" (f0) + : "memory" + ); +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc new file mode 100644 index 0000000000..401387a643 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The rdft AEC algorithm, neon version of speed-critical functions. + * + * Based on the sse2 version. + */ + +#include "modules/audio_processing/utility/ooura_fft.h" + +#include <arm_neon.h> + +#include "modules/audio_processing/utility/ooura_fft_tables_common.h" +#include "modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h" + +namespace webrtc { + +#if defined(WEBRTC_HAS_NEON) +void cft1st_128_neon(float* a) { + const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); + int j, k2; + + for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { + float32x4_t a00v = vld1q_f32(&a[j + 0]); + float32x4_t a04v = vld1q_f32(&a[j + 4]); + float32x4_t a08v = vld1q_f32(&a[j + 8]); + float32x4_t a12v = vld1q_f32(&a[j + 12]); + float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v)); + float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v)); + float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v)); + float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v)); + const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]); + const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]); + const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]); + const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]); + const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]); + const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]); + float32x4_t x0v = vaddq_f32(a01v, a23v); + const float32x4_t x1v = vsubq_f32(a01v, a23v); + const float32x4_t x2v = vaddq_f32(a45v, a67v); + const float32x4_t x3v = vsubq_f32(a45v, a67v); + const float32x4_t x3w = vrev64q_f32(x3v); + float32x4_t x0w; + a01v = vaddq_f32(x0v, x2v); + x0v = vsubq_f32(x0v, x2v); + x0w = vrev64q_f32(x0v); + a45v = vmulq_f32(wk2rv, x0v); + a45v = vmlaq_f32(a45v, wk2iv, x0w); + x0v = vmlaq_f32(x1v, x3w, vec_swap_sign); + x0w = vrev64q_f32(x0v); + a23v = vmulq_f32(wk1rv, x0v); + a23v = vmlaq_f32(a23v, wk1iv, x0w); + x0v = vmlsq_f32(x1v, x3w, vec_swap_sign); + x0w = vrev64q_f32(x0v); + a67v = vmulq_f32(wk3rv, x0v); + a67v = vmlaq_f32(a67v, wk3iv, x0w); + a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v)); + a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v)); + a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v)); + a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v)); + vst1q_f32(&a[j + 0], a00v); + vst1q_f32(&a[j + 4], a04v); + vst1q_f32(&a[j + 8], a08v); + vst1q_f32(&a[j + 12], a12v); + } +} + +void cftmdl_128_neon(float* a) { + int j; + const int l = 8; + const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); + float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r); + + for (j = 0; j < l; j += 2) { + const float32x2_t a_00 = vld1_f32(&a[j + 0]); + const float32x2_t a_08 = vld1_f32(&a[j + 8]); + const float32x2_t a_32 = vld1_f32(&a[j + 32]); + const float32x2_t a_40 = vld1_f32(&a[j + 40]); + const float32x4_t a_00_32 = vcombine_f32(a_00, a_32); + const float32x4_t a_08_40 = vcombine_f32(a_08, a_40); + const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40); + const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40); + const float32x2_t a_16 = vld1_f32(&a[j + 16]); + const float32x2_t a_24 = vld1_f32(&a[j + 24]); + const float32x2_t a_48 = vld1_f32(&a[j + 48]); + const float32x2_t a_56 = vld1_f32(&a[j + 56]); + const float32x4_t a_16_48 = vcombine_f32(a_16, a_48); + const float32x4_t a_24_56 = vcombine_f32(a_24, a_56); + const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56); + const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56); + const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1); + const float32x4_t x1_x3_add = + vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x4_t x1_x3_sub = + vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0); + const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0); + const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s); + const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1); + const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1); + const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s); + const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as); + const float32x4_t yy4 = vmulq_f32(wk1rv, yy0); + const float32x4_t xx1_rev = vrev64q_f32(xx1); + const float32x4_t yy4_rev = vrev64q_f32(yy4); + + vst1_f32(&a[j + 0], vget_low_f32(xx0)); + vst1_f32(&a[j + 32], vget_high_f32(xx0)); + vst1_f32(&a[j + 16], vget_low_f32(xx1)); + vst1_f32(&a[j + 48], vget_high_f32(xx1_rev)); + + a[j + 48] = -a[j + 48]; + + vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add)); + vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub)); + vst1_f32(&a[j + 40], vget_low_f32(yy4)); + vst1_f32(&a[j + 56], vget_high_f32(yy4_rev)); + } + + { + const int k = 64; + const int k1 = 2; + const int k2 = 2 * k1; + const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]); + const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]); + const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]); + const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]); + const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]); + wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]); + for (j = k; j < l + k; j += 2) { + const float32x2_t a_00 = vld1_f32(&a[j + 0]); + const float32x2_t a_08 = vld1_f32(&a[j + 8]); + const float32x2_t a_32 = vld1_f32(&a[j + 32]); + const float32x2_t a_40 = vld1_f32(&a[j + 40]); + const float32x4_t a_00_32 = vcombine_f32(a_00, a_32); + const float32x4_t a_08_40 = vcombine_f32(a_08, a_40); + const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40); + const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40); + const float32x2_t a_16 = vld1_f32(&a[j + 16]); + const float32x2_t a_24 = vld1_f32(&a[j + 24]); + const float32x2_t a_48 = vld1_f32(&a[j + 48]); + const float32x2_t a_56 = vld1_f32(&a[j + 56]); + const float32x4_t a_16_48 = vcombine_f32(a_16, a_48); + const float32x4_t a_24_56 = vcombine_f32(a_24, a_56); + const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56); + const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56); + const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1); + const float32x4_t x1_x3_add = + vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x4_t x1_x3_sub = + vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + float32x4_t xx4 = vmulq_f32(wk2rv, xx1); + float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add); + float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub); + xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1)); + xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add)); + xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub)); + + vst1_f32(&a[j + 0], vget_low_f32(xx)); + vst1_f32(&a[j + 32], vget_high_f32(xx)); + vst1_f32(&a[j + 16], vget_low_f32(xx4)); + vst1_f32(&a[j + 48], vget_high_f32(xx4)); + vst1_f32(&a[j + 8], vget_low_f32(xx12)); + vst1_f32(&a[j + 40], vget_high_f32(xx12)); + vst1_f32(&a[j + 24], vget_low_f32(xx22)); + vst1_f32(&a[j + 56], vget_high_f32(xx22)); + } + } +} + +__inline static float32x4_t reverse_order_f32x4(float32x4_t in) { + // A B C D -> C D A B + const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in)); + // C D A B -> D C B A + return vrev64q_f32(rev); +} + +void rftfsub_128_neon(float* a) { + const float* c = rdft_w + 32; + int j1, j2; + const float32x4_t mm_half = vdupq_n_f32(0.5f); + + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, + const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, + const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, + const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, + const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + // 2, 4, 6, 8, 3, 5, 7, 9 + float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]); + // 120, 122, 124, 126, 121, 123, 125, 127, + const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]); + // 126, 124, 122, 120 + const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]); + // 127, 125, 123, 121 + const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]); + // Calculate 'x'. + const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const float32x4_t a_ = vmulq_f32(wkr_, xr_); + const float32x4_t b_ = vmulq_f32(wki_, xi_); + const float32x4_t c_ = vmulq_f32(wkr_, xi_); + const float32x4_t d_ = vmulq_f32(wki_, xr_); + const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + // 126, 124, 122, 120, + const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_); + // 127, 125, 123, 121, + const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_); + // Shuffle in right order and store. + const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n); + const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n); + // 124, 125, 126, 127, 120, 121, 122, 123 + const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr); + // 2, 4, 6, 8, + a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_); + // 3, 5, 7, 9, + a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_); + // 2, 3, 4, 5, 6, 7, 8, 9, + vst2q_f32(&a[0 + j2], a_j2_p); + + vst1q_f32(&a[122 - j2], a_k2_n.val[1]); + vst1q_f32(&a[126 - j2], a_k2_n.val[0]); + } + + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + const int k2 = 128 - j2; + const int k1 = 32 - j1; + const float wkr = 0.5f - c[k1]; + const float wki = c[j1]; + const float xr = a[j2 + 0] - a[k2 + 0]; + const float xi = a[j2 + 1] + a[k2 + 1]; + const float yr = wkr * xr - wki * xi; + const float yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +void rftbsub_128_neon(float* a) { + const float* c = rdft_w + 32; + int j1, j2; + const float32x4_t mm_half = vdupq_n_f32(0.5f); + + a[1] = -a[1]; + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, + const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, + const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, + const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, + const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + // 2, 4, 6, 8, 3, 5, 7, 9 + float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]); + // 120, 122, 124, 126, 121, 123, 125, 127, + const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]); + // 126, 124, 122, 120 + const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]); + // 127, 125, 123, 121 + const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]); + // Calculate 'x'. + const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const float32x4_t a_ = vmulq_f32(wkr_, xr_); + const float32x4_t b_ = vmulq_f32(wki_, xi_); + const float32x4_t c_ = vmulq_f32(wkr_, xi_); + const float32x4_t d_ = vmulq_f32(wki_, xr_); + const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + // 126, 124, 122, 120, + const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_); + // 127, 125, 123, 121, + const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1); + // Shuffle in right order and store. + // 2, 3, 4, 5, 6, 7, 8, 9, + const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n); + const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n); + // 124, 125, 126, 127, 120, 121, 122, 123 + const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr); + // 2, 4, 6, 8, + a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_); + // 3, 5, 7, 9, + a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]); + // 2, 3, 4, 5, 6, 7, 8, 9, + vst2q_f32(&a[0 + j2], a_j2_p); + + vst1q_f32(&a[122 - j2], a_k2_n.val[1]); + vst1q_f32(&a[126 - j2], a_k2_n.val[0]); + } + + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + const int k2 = 128 - j2; + const int k1 = 32 - j1; + const float wkr = 0.5f - c[k1]; + const float wki = c[j1]; + const float xr = a[j2 + 0] - a[k2 + 0]; + const float xi = a[j2 + 1] + a[k2 + 1]; + const float yr = wkr * xr + wki * xi; + const float yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc new file mode 100644 index 0000000000..48a05c3bc2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc @@ -0,0 +1,438 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing//utility/ooura_fft.h" + +#include <emmintrin.h> + +#include "modules/audio_processing/utility/ooura_fft_tables_common.h" +#include "modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h" + +namespace webrtc { + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +namespace { +// These intrinsics were unavailable before VS 2008. +// TODO(andrew): move to a common file. +#if defined(_MSC_VER) && _MSC_VER < 1500 +static __inline __m128 _mm_castsi128_ps(__m128i a) { + return *(__m128*)&a; +} +static __inline __m128i _mm_castps_si128(__m128 a) { + return *(__m128i*)&a; +} +#endif + +} // namespace + +void cft1st_128_SSE2(float* a) { + const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); + int j, k2; + + for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { + __m128 a00v = _mm_loadu_ps(&a[j + 0]); + __m128 a04v = _mm_loadu_ps(&a[j + 4]); + __m128 a08v = _mm_loadu_ps(&a[j + 8]); + __m128 a12v = _mm_loadu_ps(&a[j + 12]); + __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2)); + __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2)); + + const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]); + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]); + __m128 x0v = _mm_add_ps(a01v, a23v); + const __m128 x1v = _mm_sub_ps(a01v, a23v); + const __m128 x2v = _mm_add_ps(a45v, a67v); + const __m128 x3v = _mm_sub_ps(a45v, a67v); + __m128 x0w; + a01v = _mm_add_ps(x0v, x2v); + x0v = _mm_sub_ps(x0v, x2v); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + { + const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v); + const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w); + a45v = _mm_add_ps(a45_0v, a45_1v); + } + { + __m128 a23_0v, a23_1v; + const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w); + x0v = _mm_add_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + a23_0v = _mm_mul_ps(wk1rv, x0v); + a23_1v = _mm_mul_ps(wk1iv, x0w); + a23v = _mm_add_ps(a23_0v, a23_1v); + + x0v = _mm_sub_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + } + { + const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v); + const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w); + a67v = _mm_add_ps(a67_0v, a67_1v); + } + + a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0)); + a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0)); + a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2)); + a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2)); + _mm_storeu_ps(&a[j + 0], a00v); + _mm_storeu_ps(&a[j + 4], a04v); + _mm_storeu_ps(&a[j + 8], a08v); + _mm_storeu_ps(&a[j + 12], a12v); + } +} + +void cftmdl_128_SSE2(float* a) { + const int l = 8; + const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); + int j0; + + __m128 wk1rv = _mm_load_ps(cftmdl_wk1r); + for (j0 = 0; j0 < l; j0 += 2) { + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); + const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); + const __m128 a_00_32 = + _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = + _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); + + const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); + const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); + const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); + const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); + const __m128 a_16_48 = + _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = + _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); + const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); + + const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + + const __m128 yy0 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2)); + const __m128 yy1 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3)); + const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1); + const __m128 yy3 = _mm_add_ps(yy0, yy2); + const __m128 yy4 = _mm_mul_ps(wk1rv, yy3); + + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3))); + a[j0 + 48] = -a[j0 + 48]; + + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); + _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub)); + + _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3))); + } + + { + int k = 64; + int k1 = 2; + int k2 = 2 * k1; + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]); + wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]); + for (j0 = k; j0 < l + k; j0 += 2) { + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); + const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); + const __m128 a_00_32 = + _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = + _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); + + const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); + const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); + const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); + const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); + const __m128 a_16_48 = + _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = + _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); + const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); + + const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx2 = _mm_mul_ps(xx1, wk2rv); + const __m128 xx3 = _mm_mul_ps( + wk2iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1), + _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx4 = _mm_add_ps(xx2, xx3); + + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + + const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv); + const __m128 xx11 = _mm_mul_ps( + wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add), + _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx12 = _mm_add_ps(xx10, xx11); + + const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv); + const __m128 xx21 = _mm_mul_ps( + wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub), + _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx22 = _mm_add_ps(xx20, xx21); + + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 40], + _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2))); + + _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2))); + } + } +} + +void rftfsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f, + 0.5f}; + const __m128 mm_half = _mm_load_ps(k_half); + + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 wkr_ = + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, + const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, + // Calculate 'x'. + const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const __m128 a_ = _mm_mul_ps(wkr_, xr_); + const __m128 b_ = _mm_mul_ps(wki_, xi_); + const __m128 c_ = _mm_mul_ps(wkr_, xi_); + const __m128 d_ = _mm_mul_ps(wki_, xr_); + const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, + const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9, + const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, + const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121, + // Shuffle in right order and store. + const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); + // 2, 3, 4, 5, + const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); + // 6, 7, 8, 9, + const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); + // 122, 123, 120, 121, + const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); + _mm_storeu_ps(&a[122 - j2], a_k2_0n); + _mm_storeu_ps(&a[126 - j2], a_k2_4n); + } + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +void rftbsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f, + 0.5f}; + const __m128 mm_half = _mm_load_ps(k_half); + + a[1] = -a[1]; + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 wkr_ = + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, + const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, + // Calculate 'x'. + const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr + wki * xi; + // yi = wkr * xi - wki * xr; + const __m128 a_ = _mm_mul_ps(wkr_, xr_); + const __m128 b_ = _mm_mul_ps(wki_, xi_); + const __m128 c_ = _mm_mul_ps(wkr_, xi_); + const __m128 d_ = _mm_mul_ps(wki_, xr_); + const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] = a[j2 + 0] - yr; + // a[j2 + 1] = yi - a[j2 + 1]; + // a[k2 + 0] = yr + a[k2 + 0]; + // a[k2 + 1] = yi - a[k2 + 1]; + const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, + const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9, + const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, + const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121, + // Shuffle in right order and store. + const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); + // 2, 3, 4, 5, + const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); + // 6, 7, 8, 9, + const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); + // 122, 123, 120, 121, + const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); + _mm_storeu_ps(&a[122 - j2], a_k2_0n); + _mm_storeu_ps(&a[126 - j2], a_k2_4n); + } + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} +#endif + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h new file mode 100644 index 0000000000..47d076ea2a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_ + +#include "modules/audio_processing/utility/ooura_fft.h" + +namespace webrtc { + +// This tables used to be computed at run-time. For example, refer to: +// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564 +// to see the initialization code. +// Constants shared by all paths (C, SSE2, NEON). +const float rdft_w[64] = { + 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, 0.9238795638f, + 0.3826834559f, 0.3826834559f, 0.9238795638f, 0.9807852507f, 0.1950903237f, + 0.5555702448f, 0.8314695954f, 0.8314695954f, 0.5555702448f, 0.1950903237f, + 0.9807852507f, 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f, + 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, 0.9569403529f, + 0.2902846634f, 0.4713967443f, 0.8819212914f, 0.7730104327f, 0.6343933344f, + 0.0980171412f, 0.9951847196f, 0.7071067691f, 0.4993977249f, 0.4975923598f, + 0.4945882559f, 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f, + 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, 0.4157347977f, + 0.4016037583f, 0.3865052164f, 0.3704755902f, 0.3535533845f, 0.3357794881f, + 0.3171966672f, 0.2978496552f, 0.2777851224f, 0.2570513785f, 0.2356983721f, + 0.2137775421f, 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f, + 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f, +}; + +// Constants used by the C and MIPS paths. +const float rdft_wk3ri_first[16] = { + 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f, + 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f, + 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f, + 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f, +}; +const float rdft_wk3ri_second[16] = { + -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f, + -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f, + -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f, + -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f, +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h new file mode 100644 index 0000000000..1c44ae7197 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_ +#define MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_ + +#include "modules/audio_processing/utility/ooura_fft.h" + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +namespace webrtc { + +// These tables used to be computed at run-time. For example, refer to: +// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564 +// to see the initialization code. +#if defined(WEBRTC_ARCH_X86_FAMILY) || defined(WEBRTC_HAS_NEON) +// Constants used by SSE2 and NEON but initialized in the C path. +const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; + +ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = { + 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, 0.923879564f, + 0.923879564f, 0.382683456f, 0.382683456f, 0.980785251f, 0.980785251f, + 0.555570245f, 0.555570245f, 0.831469595f, 0.831469595f, 0.195090324f, + 0.195090324f, 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f, + 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, 0.956940353f, + 0.956940353f, 0.471396744f, 0.471396744f, 0.773010433f, 0.773010433f, + 0.098017141f, 0.098017141f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = { + 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, 0.707106769f, + 0.707106769f, -0.707106769f, -0.707106769f, 0.923879564f, 0.923879564f, + -0.382683456f, -0.382683456f, 0.382683456f, 0.382683456f, -0.923879564f, + -0.923879564f, 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f, + 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, 0.831469595f, + 0.831469595f, -0.555570245f, -0.555570245f, 0.195090324f, 0.195090324f, + -0.980785251f, -0.980785251f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = { + 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, 0.382683456f, + 0.382683456f, -0.923879564f, -0.923879564f, 0.831469536f, 0.831469536f, + -0.980785251f, -0.980785251f, -0.195090353f, -0.195090353f, -0.555570245f, + -0.555570245f, 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f, + 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, 0.634393334f, + 0.634393334f, -0.995184720f, -0.995184720f, -0.471396863f, -0.471396863f, + -0.290284693f, -0.290284693f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, -0.382683456f, + 0.382683456f, -0.923879564f, 0.923879564f, -0.195090324f, 0.195090324f, + -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, -0.980785251f, + 0.980785251f, -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f, + -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, -0.290284663f, + 0.290284663f, -0.881921291f, 0.881921291f, -0.634393334f, 0.634393334f, + -0.995184720f, 0.995184720f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = { + -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, -0.707106769f, + 0.707106769f, -0.707106769f, 0.707106769f, -0.382683456f, 0.382683456f, + -0.923879564f, 0.923879564f, -0.923879564f, 0.923879564f, -0.382683456f, + 0.382683456f, -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f, + -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, -0.555570245f, + 0.555570245f, -0.831469595f, 0.831469595f, -0.980785251f, 0.980785251f, + -0.195090324f, 0.195090324f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, -0.923879564f, + 0.923879564f, 0.382683456f, -0.382683456f, -0.555570245f, 0.555570245f, + -0.195090353f, 0.195090353f, -0.980785251f, 0.980785251f, 0.831469536f, + -0.831469536f, -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f, + -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, -0.773010492f, + 0.773010492f, 0.098017156f, -0.098017156f, -0.881921172f, 0.881921172f, + 0.956940353f, -0.956940353f, +}; +ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = { + 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f, +}; +#endif + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/common.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/common.h new file mode 100644 index 0000000000..b5a5fb385b --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/common.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ + +#include <stddef.h> + +static const int kSampleRateHz = 16000; +static const size_t kLength10Ms = kSampleRateHz / 100; +static const size_t kMaxNumFrames = 4; + +struct AudioFeatures { + double log_pitch_gain[kMaxNumFrames]; + double pitch_lag_hz[kMaxNumFrames]; + double spectral_peak[kMaxNumFrames]; + double rms[kMaxNumFrames]; + size_t num_frames; + bool silence; +}; + +#endif // MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm.cc new file mode 100644 index 0000000000..266ca44cc9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/gmm.h" + +#include <math.h> +#include <stdlib.h> + +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +static const int kMaxDimension = 10; + +static void RemoveMean(const double* in, + const double* mean_vec, + int dimension, + double* out) { + for (int n = 0; n < dimension; ++n) + out[n] = in[n] - mean_vec[n]; +} + +static double ComputeExponent(const double* in, + const double* covar_inv, + int dimension) { + double q = 0; + for (int i = 0; i < dimension; ++i) { + double v = 0; + for (int j = 0; j < dimension; j++) + v += (*covar_inv++) * in[j]; + q += v * in[i]; + } + q *= -0.5; + return q; +} + +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) { + if (gmm_parameters.dimension > kMaxDimension) { + return -1; // This is invalid pdf so the caller can check this. + } + double f = 0; + double v[kMaxDimension]; + const double* mean_vec = gmm_parameters.mean; + const double* covar_inv = gmm_parameters.covar_inverse; + + for (int n = 0; n < gmm_parameters.num_mixtures; n++) { + RemoveMean(x, mean_vec, gmm_parameters.dimension, v); + double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) + + gmm_parameters.weight[n]; + f += exp(q); + mean_vec += gmm_parameters.dimension; + covar_inv += gmm_parameters.dimension * gmm_parameters.dimension; + } + return f; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm.h new file mode 100644 index 0000000000..93eb675c46 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_GMM_H_ +#define MODULES_AUDIO_PROCESSING_VAD_GMM_H_ + +namespace webrtc { + +// A structure that specifies a GMM. +// A GMM is formulated as +// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... + +// w[num_mixtures - 1] * mixture[num_mixtures - 1]; +// Where a 'mixture' is a Gaussian density. + +struct GmmParameters { + // weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n])); + // where cov[n] is the covariance matrix of mixture n; + const double* weight; + // pointer to the first element of a |num_mixtures|x|dimension| matrix + // where kth row is the mean of the kth mixture. + const double* mean; + // pointer to the first element of a |num_mixtures|x|dimension|x|dimension| + // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance + // matrix of the kth mixture. + const double* covar_inverse; + // Dimensionality of the mixtures. + int dimension; + // number of the mixtures. + int num_mixtures; +}; + +// Evaluate the given GMM, according to |gmm_parameters|, at the given point +// |x|. If the dimensionality of the given GMM is larger that the maximum +// acceptable dimension by the following function -1 is returned. +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters); + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_VAD_GMM_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm_unittest.cc new file mode 100644 index 0000000000..dfc8855a14 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/gmm_unittest.cc @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/gmm.h" + +#include <math.h> + +#include "modules/audio_processing/vad/noise_gmm_tables.h" +#include "modules/audio_processing/vad/voice_gmm_tables.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(GmmTest, EvaluateGmm) { + GmmParameters noise_gmm; + GmmParameters voice_gmm; + + // Setup noise GMM. + noise_gmm.dimension = kNoiseGmmDim; + noise_gmm.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm.weight = kNoiseGmmWeights; + noise_gmm.mean = &kNoiseGmmMean[0][0]; + noise_gmm.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm.dimension = kVoiceGmmDim; + voice_gmm.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm.weight = kVoiceGmmWeights; + voice_gmm.mean = &kVoiceGmmMean[0][0]; + voice_gmm.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; + + // Test vectors. These are the mean of the GMM means. + const double kXVoice[kVoiceGmmDim] = { + -1.35893162459863, 602.862491970368, 178.022069191324}; + const double kXNoise[kNoiseGmmDim] = { + -2.33443722724409, 2827.97828765184, 141.114178166812}; + + // Expected pdf values. These values are computed in MATLAB using EvalGmm.m + const double kPdfNoise = 1.88904409403101e-07; + const double kPdfVoice = 1.30453996982266e-06; + + // Relative error should be smaller that the following value. + const double kAcceptedRelativeErr = 1e-10; + + // Test Voice. + double pdf = EvaluateGmm(kXVoice, voice_gmm); + EXPECT_GT(pdf, 0); + double relative_error = fabs(pdf - kPdfVoice) / kPdfVoice; + EXPECT_LE(relative_error, kAcceptedRelativeErr); + + // Test Noise. + pdf = EvaluateGmm(kXNoise, noise_gmm); + EXPECT_GT(pdf, 0); + relative_error = fabs(pdf - kPdfNoise) / kPdfNoise; + EXPECT_LE(relative_error, kAcceptedRelativeErr); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/noise_gmm_tables.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/noise_gmm_tables.h new file mode 100644 index 0000000000..c07dadebd5 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/noise_gmm_tables.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for inactive segments. Generated by MakeGmmTables.m. + +#ifndef MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ +#define MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ + +static const int kNoiseGmmNumMixtures = 12; +static const int kNoiseGmmDim = 3; + +static const double + kNoiseGmmCovarInverse[kNoiseGmmNumMixtures][kNoiseGmmDim][kNoiseGmmDim] = { + {{7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02}, + {4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04}, + {1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}}, + {{8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03}, + {-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04}, + {5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}}, + {{4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03}, + {-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05}, + {-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}}, + {{9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03}, + {-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07}, + {-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}}, + {{7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02}, + {-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06}, + {2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}}, + {{8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02}, + {-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06}, + {-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}}, + {{9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03}, + {5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07}, + {-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}}, + {{8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03}, + {5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07}, + {6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}}, + {{6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03}, + {-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05}, + {5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}}, + {{6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03}, + {4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08}, + {-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}}, + {{1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02}, + {-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07}, + {-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}}, + {{4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03}, + {-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07}, + {5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}}; + +static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = { + {-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01}, + {-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02}, + {-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02}, + {-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02}, + {-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01}, + {-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02}, + {-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02}, + {-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02}, + {-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02}, + {-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02}, + {-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02}, + {-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}}; + +static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = { + -1.09422832086193e+01, + -1.10847897513425e+01, + -1.36767587732187e+01, + -1.79789356118641e+01, + -1.42830169160894e+01, + -1.56500228061379e+01, + -1.83124990950113e+01, + -1.69979436177477e+01, + -1.12329424387828e+01, + -1.41311785780639e+01, + -1.47171861448585e+01, + -1.35963362781839e+01}; +#endif // MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad.cc new file mode 100644 index 0000000000..bca2552c35 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_based_vad.h" + +#include <math.h> +#include <string.h> + +#include "modules/audio_processing/vad/vad_circular_buffer.h" +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/noise_gmm_tables.h" +#include "modules/audio_processing/vad/voice_gmm_tables.h" +#include "modules/include/module_common_types.h" + +namespace webrtc { + +static_assert(kNoiseGmmDim == kVoiceGmmDim, + "noise and voice gmm dimension not equal"); + +// These values should match MATLAB counterparts for unit-tests to pass. +static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. +static const double kInitialPriorProbability = 0.3; +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static double LimitProbability(double p) { + const double kLimHigh = 0.99; + const double kLimLow = 0.01; + + if (p > kLimHigh) + p = kLimHigh; + else if (p < kLimLow) + p = kLimLow; + return p; +} + +PitchBasedVad::PitchBasedVad() + : p_prior_(kInitialPriorProbability), + circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { + // Setup noise GMM. + noise_gmm_.dimension = kNoiseGmmDim; + noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm_.weight = kNoiseGmmWeights; + noise_gmm_.mean = &kNoiseGmmMean[0][0]; + noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm_.dimension = kVoiceGmmDim; + voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm_.weight = kVoiceGmmWeights; + voice_gmm_.mean = &kVoiceGmmMean[0][0]; + voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; +} + +PitchBasedVad::~PitchBasedVad() { +} + +int PitchBasedVad::VoicingProbability(const AudioFeatures& features, + double* p_combined) { + double p; + double gmm_features[3]; + double pdf_features_given_voice; + double pdf_features_given_noise; + // These limits are the same in matlab implementation 'VoicingProbGMM().' + const double kLimLowLogPitchGain = -2.0; + const double kLimHighLogPitchGain = -0.9; + const double kLimLowSpectralPeak = 200; + const double kLimHighSpectralPeak = 2000; + const double kEps = 1e-12; + for (size_t n = 0; n < features.num_frames; n++) { + gmm_features[0] = features.log_pitch_gain[n]; + gmm_features[1] = features.spectral_peak[n]; + gmm_features[2] = features.pitch_lag_hz[n]; + + pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); + pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); + + if (features.spectral_peak[n] < kLimLowSpectralPeak || + features.spectral_peak[n] > kLimHighSpectralPeak || + features.log_pitch_gain[n] < kLimLowLogPitchGain) { + pdf_features_given_voice = kEps * pdf_features_given_noise; + } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { + pdf_features_given_noise = kEps * pdf_features_given_voice; + } + + p = p_prior_ * pdf_features_given_voice / + (pdf_features_given_voice * p_prior_ + + pdf_features_given_noise * (1 - p_prior_)); + + p = LimitProbability(p); + + // Combine pitch-based probability with standalone probability, before + // updating prior probabilities. + double prod_active = p * p_combined[n]; + double prod_inactive = (1 - p) * (1 - p_combined[n]); + p_combined[n] = prod_active / (prod_active + prod_inactive); + + if (UpdatePrior(p_combined[n]) < 0) + return -1; + // Limit prior probability. With a zero prior probability the posterior + // probability is always zero. + p_prior_ = LimitProbability(p_prior_); + } + return 0; +} + +int PitchBasedVad::UpdatePrior(double p) { + circular_buffer_->Insert(p); + if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, + kLowProbabilityThreshold) < 0) + return -1; + p_prior_ = circular_buffer_->Mean(); + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad.h new file mode 100644 index 0000000000..584dcc73ce --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ +#define MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ + +#include <memory> + +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/gmm.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class AudioFrame; +class VadCircularBuffer; + +// Computes the probability of the input audio frame to be active given +// the corresponding pitch-gain and lag of the frame. +class PitchBasedVad { + public: + PitchBasedVad(); + ~PitchBasedVad(); + + // Compute pitch-based voicing probability, given the features. + // features: a structure containing features required for computing voicing + // probabilities. + // + // p_combined: an array which contains the combined activity probabilities + // computed prior to the call of this function. The method, + // then, computes the voicing probabilities and combine them + // with the given values. The result are returned in |p|. + int VoicingProbability(const AudioFeatures& features, double* p_combined); + + private: + int UpdatePrior(double p); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static const int kNoError = 0; + + GmmParameters noise_gmm_; + GmmParameters voice_gmm_; + + double p_prior_; + + std::unique_ptr<VadCircularBuffer> circular_buffer_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc new file mode 100644 index 0000000000..fb6daa523e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_based_vad_unittest.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_based_vad.h" + +#include <math.h> +#include <stdio.h> + +#include <string> + +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(PitchBasedVadTest, VoicingProbabilityTest) { + std::string spectral_peak_file_name = + test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat"); + FILE* spectral_peak_file = fopen(spectral_peak_file_name.c_str(), "rb"); + ASSERT_TRUE(spectral_peak_file != NULL); + + std::string pitch_gain_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_gain", "dat"); + FILE* pitch_gain_file = fopen(pitch_gain_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_gain_file != NULL); + + std::string pitch_lag_file_name = + test::ResourcePath("audio_processing/agc/agc_pitch_lag", "dat"); + FILE* pitch_lag_file = fopen(pitch_lag_file_name.c_str(), "rb"); + ASSERT_TRUE(pitch_lag_file != NULL); + + std::string voicing_prob_file_name = + test::ResourcePath("audio_processing/agc/agc_voicing_prob", "dat"); + FILE* voicing_prob_file = fopen(voicing_prob_file_name.c_str(), "rb"); + ASSERT_TRUE(voicing_prob_file != NULL); + + PitchBasedVad vad_; + + double reference_activity_probability; + + AudioFeatures audio_features; + memset(&audio_features, 0, sizeof(audio_features)); + audio_features.num_frames = 1; + while (fread(audio_features.spectral_peak, + sizeof(audio_features.spectral_peak[0]), 1, + spectral_peak_file) == 1u) { + double p; + ASSERT_EQ(1u, fread(audio_features.log_pitch_gain, + sizeof(audio_features.log_pitch_gain[0]), 1, + pitch_gain_file)); + ASSERT_EQ(1u, + fread(audio_features.pitch_lag_hz, + sizeof(audio_features.pitch_lag_hz[0]), 1, pitch_lag_file)); + ASSERT_EQ(1u, fread(&reference_activity_probability, + sizeof(reference_activity_probability), 1, + voicing_prob_file)); + + p = 0.5; // Initialize to the neutral value for combining probabilities. + EXPECT_EQ(0, vad_.VoicingProbability(audio_features, &p)); + EXPECT_NEAR(p, reference_activity_probability, 0.01); + } + + fclose(spectral_peak_file); + fclose(pitch_gain_file); + fclose(pitch_lag_file); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal.cc new file mode 100644 index 0000000000..7e6bd3e616 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_internal.h" + +#include <cmath> + +// A 4-to-3 linear interpolation. +// The interpolation constants are derived as following: +// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval +// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is +// like interpolating 4-to-6 and keep the odd samples. +// The reason behind this is that LPC coefficients are computed for the first +// half of each 10ms interval. +static void PitchInterpolation(double old_val, const double* in, double* out) { + out[0] = 1. / 6. * old_val + 5. / 6. * in[0]; + out[1] = 5. / 6. * in[1] + 1. / 6. * in[2]; + out[2] = 0.5 * in[2] + 0.5 * in[3]; +} + +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz) { + // Gain interpolation is in log-domain, also returned in log-domain. + for (int n = 0; n < num_in_frames; n++) + gains[n] = log(gains[n] + 1e-12); + + // Interpolate lags and gains. + PitchInterpolation(*log_old_gain, gains, log_pitch_gain); + *log_old_gain = gains[num_in_frames - 1]; + PitchInterpolation(*old_lag, lags, pitch_lag_hz); + *old_lag = lags[num_in_frames - 1]; + + // Convert pitch-lags to Hertz. + for (int n = 0; n < num_out_frames; n++) { + pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]); + } +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal.h new file mode 100644 index 0000000000..67e0522328 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ + +// TODO(turajs): Write a description of this function. Also be consistent with +// usage of |sampling_rate_hz| vs |kSamplingFreqHz|. +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz); + +#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal_unittest.cc new file mode 100644 index 0000000000..c1fde10008 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pitch_internal_unittest.cc @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pitch_internal.h" + +#include <math.h> + +#include "test/gtest.h" + +TEST(PitchInternalTest, test) { + const int kSamplingRateHz = 8000; + const int kNumInputParameters = 4; + const int kNumOutputParameters = 3; + // Inputs + double log_old_gain = log(0.5); + double gains[] = {0.6, 0.2, 0.5, 0.4}; + + double old_lag = 70; + double lags[] = {90, 111, 122, 50}; + + // Expected outputs + double expected_log_pitch_gain[] = { + -0.541212549898316, -1.45672279045507, -0.80471895621705}; + double expected_log_old_gain = log(gains[kNumInputParameters - 1]); + + double expected_pitch_lag_hz[] = { + 92.3076923076923, 70.9010339734121, 93.0232558139535}; + double expected_old_lag = lags[kNumInputParameters - 1]; + + double log_pitch_gain[kNumOutputParameters]; + double pitch_lag_hz[kNumInputParameters]; + + GetSubframesPitchParameters(kSamplingRateHz, gains, lags, kNumInputParameters, + kNumOutputParameters, &log_old_gain, &old_lag, + log_pitch_gain, pitch_lag_hz); + + for (int n = 0; n < 3; n++) { + EXPECT_NEAR(pitch_lag_hz[n], expected_pitch_lag_hz[n], 1e-6); + EXPECT_NEAR(log_pitch_gain[n], expected_log_pitch_gain[n], 1e-8); + } + EXPECT_NEAR(old_lag, expected_old_lag, 1e-6); + EXPECT_NEAR(log_old_gain, expected_log_old_gain, 1e-8); +} diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter.cc new file mode 100644 index 0000000000..fa56a3c15e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pole_zero_filter.h" + +#include <stdlib.h> +#include <string.h> +#include <algorithm> + +namespace webrtc { + +PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) { + if (order_numerator > kMaxFilterOrder || + order_denominator > kMaxFilterOrder || denominator_coefficients[0] == 0 || + numerator_coefficients == NULL || denominator_coefficients == NULL) + return NULL; + return new PoleZeroFilter(numerator_coefficients, order_numerator, + denominator_coefficients, order_denominator); +} + +PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) + : past_input_(), + past_output_(), + numerator_coefficients_(), + denominator_coefficients_(), + order_numerator_(order_numerator), + order_denominator_(order_denominator), + highest_order_(std::max(order_denominator, order_numerator)) { + memcpy(numerator_coefficients_, numerator_coefficients, + sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1)); + memcpy(denominator_coefficients_, denominator_coefficients, + sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1)); + + if (denominator_coefficients_[0] != 1) { + for (size_t n = 0; n <= order_numerator_; n++) + numerator_coefficients_[n] /= denominator_coefficients_[0]; + for (size_t n = 0; n <= order_denominator_; n++) + denominator_coefficients_[n] /= denominator_coefficients_[0]; + } +} + +template <typename T> +static float FilterArPast(const T* past, size_t order, + const float* coefficients) { + float sum = 0.0f; + size_t past_index = order - 1; + for (size_t k = 1; k <= order; k++, past_index--) + sum += coefficients[k] * past[past_index]; + return sum; +} + +int PoleZeroFilter::Filter(const int16_t* in, + size_t num_input_samples, + float* output) { + if (in == NULL || output == NULL) + return -1; + // This is the typical case, just a memcpy. + const size_t k = std::min(num_input_samples, highest_order_); + size_t n; + for (n = 0; n < k; n++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += FilterArPast(&past_input_[n], order_numerator_, + numerator_coefficients_); + output[n] -= FilterArPast(&past_output_[n], order_denominator_, + denominator_coefficients_); + + past_input_[n + order_numerator_] = in[n]; + past_output_[n + order_denominator_] = output[n]; + } + if (highest_order_ < num_input_samples) { + for (size_t m = 0; n < num_input_samples; n++, m++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += + FilterArPast(&in[m], order_numerator_, numerator_coefficients_); + output[n] -= FilterArPast(&output[m], order_denominator_, + denominator_coefficients_); + } + // Record into the past signal. + memcpy(past_input_, &in[num_input_samples - order_numerator_], + sizeof(in[0]) * order_numerator_); + memcpy(past_output_, &output[num_input_samples - order_denominator_], + sizeof(output[0]) * order_denominator_); + } else { + // Odd case that the length of the input is shorter that filter order. + memmove(past_input_, &past_input_[num_input_samples], + order_numerator_ * sizeof(past_input_[0])); + memmove(past_output_, &past_output_[num_input_samples], + order_denominator_ * sizeof(past_output_[0])); + } + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter.h new file mode 100644 index 0000000000..283deecde3 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ + +#include <cstddef> + +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class PoleZeroFilter { + public: + ~PoleZeroFilter() {} + + static PoleZeroFilter* Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + int Filter(const int16_t* in, size_t num_input_samples, float* output); + + private: + PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + static const int kMaxFilterOrder = 24; + + int16_t past_input_[kMaxFilterOrder * 2]; + float past_output_[kMaxFilterOrder * 2]; + + float numerator_coefficients_[kMaxFilterOrder + 1]; + float denominator_coefficients_[kMaxFilterOrder + 1]; + + size_t order_numerator_; + size_t order_denominator_; + size_t highest_order_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc new file mode 100644 index 0000000000..aef07a6e44 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/pole_zero_filter_unittest.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/pole_zero_filter.h" + +#include <math.h> +#include <stdio.h> + +#include <memory> + +#include "modules/audio_processing/vad/vad_audio_proc_internal.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { + +static const int kInputSamples = 50; + +static const int16_t kInput[kInputSamples] = { + -2136, -7116, 10715, 2464, 3164, 8139, 11393, 24013, -32117, -5544, + -27740, 10181, 14190, -24055, -15912, 17393, 6359, -9950, -13894, 32432, + -23944, 3437, -8381, 19768, 3087, -19795, -5920, 13310, 1407, 3876, + 4059, 3524, -23130, 19121, -27900, -24840, 4089, 21422, -3625, 3015, + -11236, 28856, 13424, 6571, -19761, -6361, 15821, -9469, 29727, 32229}; + +static const float kReferenceOutput[kInputSamples] = { + -2082.230472f, -6878.572941f, 10697.090871f, 2358.373952f, + 2973.936512f, 7738.580650f, 10690.803213f, 22687.091576f, + -32676.684717f, -5879.621684f, -27359.297432f, 10368.735888f, + 13994.584604f, -23676.126249f, -15078.250390f, 17818.253338f, + 6577.743123f, -9498.369315f, -13073.651079f, 32460.026588f, + -23391.849347f, 3953.805667f, -7667.761363f, 19995.153447f, + 3185.575477f, -19207.365160f, -5143.103201f, 13756.317237f, + 1779.654794f, 4142.269755f, 4209.475034f, 3572.991789f, + -22509.089546f, 19307.878964f, -27060.439759f, -23319.042810f, + 5547.685267f, 22312.718676f, -2707.309027f, 3852.358490f, + -10135.510093f, 29241.509970f, 13394.397233f, 6340.721417f, + -19510.207905f, -5908.442086f, 15882.301634f, -9211.335255f, + 29253.056735f, 30874.443046f}; + +class PoleZeroFilterTest : public ::testing::Test { + protected: + PoleZeroFilterTest() + : my_filter_(PoleZeroFilter::Create(kCoeffNumerator, + kFilterOrder, + kCoeffDenominator, + kFilterOrder)) {} + + ~PoleZeroFilterTest() {} + + void FilterSubframes(int num_subframes); + + private: + void TestClean(); + std::unique_ptr<PoleZeroFilter> my_filter_; +}; + +void PoleZeroFilterTest::FilterSubframes(int num_subframes) { + float output[kInputSamples]; + const int num_subframe_samples = kInputSamples / num_subframes; + EXPECT_EQ(num_subframe_samples * num_subframes, kInputSamples); + + for (int n = 0; n < num_subframes; n++) { + my_filter_->Filter(&kInput[n * num_subframe_samples], num_subframe_samples, + &output[n * num_subframe_samples]); + } + for (int n = 0; n < kInputSamples; n++) { + EXPECT_NEAR(output[n], kReferenceOutput[n], 1); + } +} + +TEST_F(PoleZeroFilterTest, OneSubframe) { + FilterSubframes(1); +} + +TEST_F(PoleZeroFilterTest, TwoSubframes) { + FilterSubframes(2); +} + +TEST_F(PoleZeroFilterTest, FiveSubframes) { + FilterSubframes(5); +} + +TEST_F(PoleZeroFilterTest, TenSubframes) { + FilterSubframes(10); +} + +TEST_F(PoleZeroFilterTest, TwentyFiveSubframes) { + FilterSubframes(25); +} + +TEST_F(PoleZeroFilterTest, FiftySubframes) { + FilterSubframes(50); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad.cc new file mode 100644 index 0000000000..004cefebb8 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad.cc @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/standalone_vad.h" + +#include "audio/utility/audio_frame_operations.h" +#include "modules/include/module_common_types.h" +#include "rtc_base/checks.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +static const int kDefaultStandaloneVadMode = 3; + +StandaloneVad::StandaloneVad(VadInst* vad) + : vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) { +} + +StandaloneVad::~StandaloneVad() { + WebRtcVad_Free(vad_); +} + +StandaloneVad* StandaloneVad::Create() { + VadInst* vad = WebRtcVad_Create(); + if (!vad) + return nullptr; + + int err = WebRtcVad_Init(vad); + err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode); + if (err != 0) { + WebRtcVad_Free(vad); + return nullptr; + } + return new StandaloneVad(vad); +} + +int StandaloneVad::AddAudio(const int16_t* data, size_t length) { + if (length != kLength10Ms) + return -1; + + if (index_ + length > kLength10Ms * kMaxNum10msFrames) + // Reset the buffer if it's full. + // TODO(ajm): Instead, consider just processing every 10 ms frame. Then we + // can forgo the buffering. + index_ = 0; + + memcpy(&buffer_[index_], data, sizeof(int16_t) * length); + index_ += length; + return 0; +} + +int StandaloneVad::GetActivity(double* p, size_t length_p) { + if (index_ == 0) + return -1; + + const size_t num_frames = index_ / kLength10Ms; + if (num_frames > length_p) + return -1; + RTC_DCHECK_EQ(0, WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_)); + + int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_); + if (activity < 0) + return -1; + else if (activity == 0) + p[0] = 0.01; // Arbitrary but small and non-zero. + else + p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities. + for (size_t n = 1; n < num_frames; n++) + p[n] = p[0]; + // Reset the buffer to start from the beginning. + index_ = 0; + return activity; +} + +int StandaloneVad::set_mode(int mode) { + if (mode < 0 || mode > 3) + return -1; + if (WebRtcVad_set_mode(vad_, mode) != 0) + return -1; + + mode_ = mode; + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad.h new file mode 100644 index 0000000000..b85de0a75a --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ +#define MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ + +#include "modules/audio_processing/vad/common.h" +#include "common_audio/vad/include/webrtc_vad.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class AudioFrame; + +class StandaloneVad { + public: + static StandaloneVad* Create(); + ~StandaloneVad(); + + // Outputs + // p: a buffer where probabilities are written to. + // length_p: number of elements of |p|. + // + // return value: + // -1: if no audio is stored or VAD returns error. + // 0: in success. + // In case of error the content of |activity| is unchanged. + // + // Note that due to a high false-positive (VAD decision is active while the + // processed audio is just background noise) rate, stand-alone VAD is used as + // a one-sided indicator. The activity probability is 0.5 if the frame is + // classified as active, and the probability is 0.01 if the audio is + // classified as passive. In this way, when probabilities are combined, the + // effect of the stand-alone VAD is neutral if the input is classified as + // active. + int GetActivity(double* p, size_t length_p); + + // Expecting 10 ms of 16 kHz audio to be pushed in. + int AddAudio(const int16_t* data, size_t length); + + // Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most + // aggressive mode. Returns -1 if the input is less than 0 or larger than 3, + // otherwise 0 is returned. + int set_mode(int mode); + // Get the agressiveness of the current VAD. + int mode() const { return mode_; } + + private: + explicit StandaloneVad(VadInst* vad); + + static const size_t kMaxNum10msFrames = 3; + + // TODO(turajs): Is there a way to use scoped-pointer here? + VadInst* vad_; + int16_t buffer_[kMaxNum10msFrames * kLength10Ms]; + size_t index_; + int mode_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad_unittest.cc new file mode 100644 index 0000000000..28d1349396 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/standalone_vad_unittest.cc @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/standalone_vad.h" + +#include <string.h> + +#include <memory> + +#include "modules/include/module_common_types.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(StandaloneVadTest, Api) { + std::unique_ptr<StandaloneVad> vad(StandaloneVad::Create()); + int16_t data[kLength10Ms] = {0}; + + // Valid frame length (for 32 kHz rate), but not what the VAD is expecting. + EXPECT_EQ(-1, vad->AddAudio(data, 320)); + + const size_t kMaxNumFrames = 3; + double p[kMaxNumFrames]; + for (size_t n = 0; n < kMaxNumFrames; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + + // Pretend |p| is shorter that it should be. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames - 1)); + + EXPECT_EQ(0, vad->GetActivity(p, kMaxNumFrames)); + + // Ask for activity when buffer is empty. + EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames)); + + // Should reset and result in one buffer. + for (size_t n = 0; n < kMaxNumFrames + 1; n++) + EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms)); + EXPECT_EQ(0, vad->GetActivity(p, 1)); + + // Wrong modes + EXPECT_EQ(-1, vad->set_mode(-1)); + EXPECT_EQ(-1, vad->set_mode(4)); + + // Valid mode. + const int kMode = 2; + EXPECT_EQ(0, vad->set_mode(kMode)); + EXPECT_EQ(kMode, vad->mode()); +} + +#if defined(WEBRTC_IOS) +TEST(StandaloneVadTest, DISABLED_ActivityDetection) { +#else +TEST(StandaloneVadTest, ActivityDetection) { +#endif + std::unique_ptr<StandaloneVad> vad(StandaloneVad::Create()); + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = {0}; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/agc/agc_audio", "pcm").c_str(), + "rb"); + ASSERT_TRUE(pcm_file != NULL); + + FILE* reference_file = fopen( + test::ResourcePath("audio_processing/agc/agc_vad", "dat").c_str(), "rb"); + ASSERT_TRUE(reference_file != NULL); + + // Reference activities are prepared with 0 aggressiveness. + ASSERT_EQ(0, vad->set_mode(0)); + + // Stand-alone VAD can operate on 1, 2 or 3 frames of length 10 ms. The + // reference file is created for 30 ms frame. + const int kNumVadFramesToProcess = 3; + int num_frames = 0; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + vad->AddAudio(data, kDataLength); + num_frames++; + if (num_frames == kNumVadFramesToProcess) { + num_frames = 0; + int referece_activity; + double p[kNumVadFramesToProcess]; + EXPECT_EQ(1u, fread(&referece_activity, sizeof(referece_activity), 1, + reference_file)); + int activity = vad->GetActivity(p, kNumVadFramesToProcess); + EXPECT_EQ(referece_activity, activity); + if (activity != 0) { + // When active, probabilities are set to 0.5. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.5, p[n]); + } else { + // When inactive, probabilities are set to 0.01. + for (int n = 0; n < kNumVadFramesToProcess; n++) + EXPECT_EQ(0.01, p[n]); + } + } + } + fclose(reference_file); + fclose(pcm_file); +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc.cc new file mode 100644 index 0000000000..b1841d0b21 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc.cc @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_audio_proc.h" + +#include <math.h> +#include <stdio.h> + +#include "common_audio/fft4g.h" +#include "modules/audio_processing/vad/pitch_internal.h" +#include "modules/audio_processing/vad/pole_zero_filter.h" +#include "modules/audio_processing/vad/vad_audio_proc_internal.h" +#include "rtc_base/checks.h" +extern "C" { +#include "modules/audio_coding/codecs/isac/main/source/codec.h" +#include "modules/audio_coding/codecs/isac/main/source/lpc_analysis.h" +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "modules/audio_coding/codecs/isac/main/source/structs.h" +} +#include "modules/include/module_common_types.h" + +namespace webrtc { + +// The following structures are declared anonymous in iSAC's structs.h. To +// forward declare them, we use this derived class trick. +struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {}; +struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {}; + +static constexpr float kFrequencyResolution = + kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize); +static constexpr int kSilenceRms = 5; + +// TODO(turajs): Make a Create or Init for VadAudioProc. +VadAudioProc::VadAudioProc() + : audio_buffer_(), + num_buffer_samples_(kNumPastSignalSamples), + log_old_gain_(-2), + old_lag_(50), // Arbitrary but valid as pitch-lag (in samples). + pitch_analysis_handle_(new PitchAnalysisStruct), + pre_filter_handle_(new PreFiltBankstr), + high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator, + kFilterOrder, + kCoeffDenominator, + kFilterOrder)) { + static_assert(kNumPastSignalSamples + kNumSubframeSamples == + sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]), + "lpc analysis window incorrect size"); + static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]), + "correlation weight incorrect size"); + + // TODO(turajs): Are we doing too much in the constructor? + float data[kDftSize]; + // Make FFT to initialize. + ip_[0] = 0; + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + // TODO(turajs): Need to initialize high-pass filter. + + // Initialize iSAC components. + WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get()); + WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get()); +} + +VadAudioProc::~VadAudioProc() { +} + +void VadAudioProc::ResetBuffer() { + memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], + sizeof(audio_buffer_[0]) * kNumPastSignalSamples); + num_buffer_samples_ = kNumPastSignalSamples; +} + +int VadAudioProc::ExtractFeatures(const int16_t* frame, + size_t length, + AudioFeatures* features) { + features->num_frames = 0; + if (length != kNumSubframeSamples) { + return -1; + } + + // High-pass filter to remove the DC component and very low frequency content. + // We have experienced that this high-pass filtering improves voice/non-voiced + // classification. + if (high_pass_filter_->Filter(frame, kNumSubframeSamples, + &audio_buffer_[num_buffer_samples_]) != 0) { + return -1; + } + + num_buffer_samples_ += kNumSubframeSamples; + if (num_buffer_samples_ < kBufferLength) { + return 0; + } + RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength); + features->num_frames = kNum10msSubframes; + features->silence = false; + + Rms(features->rms, kMaxNumFrames); + for (size_t i = 0; i < kNum10msSubframes; ++i) { + if (features->rms[i] < kSilenceRms) { + // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. + // Bail out here instead. + features->silence = true; + ResetBuffer(); + return 0; + } + } + + PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, + kMaxNumFrames); + FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); + ResetBuffer(); + return 0; +} + +// Computes |kLpcOrder + 1| correlation coefficients. +void VadAudioProc::SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index) { + RTC_DCHECK_GE(length_corr, kLpcOrder + 1); + double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; + size_t buffer_index = subframe_index * kNumSubframeSamples; + + for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) + windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; + + WebRtcIsac_AutoCorr(corr, windowed_audio, + kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); +} + +// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input. +// The analysis window is 15 ms long and it is centered on the first half of +// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the +// first half of each 10 ms subframe. +void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) { + RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1)); + double corr[kLpcOrder + 1]; + double reflec_coeff[kLpcOrder]; + for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes; + i++, offset_lpc += kLpcOrder + 1) { + SubframeCorrelation(corr, kLpcOrder + 1, i); + corr[0] *= 1.0001; + // This makes Lev-Durb a bit more stable. + for (size_t k = 0; k < kLpcOrder + 1; k++) { + corr[k] *= kCorrWeight[k]; + } + WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); + } +} + +// Fit a second order curve to these 3 points and find the location of the +// extremum. The points are inverted before curve fitting. +static float QuadraticInterpolation(float prev_val, + float curr_val, + float next_val) { + // Doing the interpolation in |1 / A(z)|^2. + float fractional_index = 0; + next_val = 1.0f / next_val; + prev_val = 1.0f / prev_val; + curr_val = 1.0f / curr_val; + + fractional_index = + -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); + RTC_DCHECK_LT(fabs(fractional_index), 1); + return fractional_index; +} + +// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope +// of the input signal. The local maximum of the spectral envelope corresponds +// with the local minimum of A(z). It saves complexity, as we save one +// inversion. Furthermore, we find the first local maximum of magnitude squared, +// to save on one square root. +void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, + size_t length_f_peak) { + RTC_DCHECK_GE(length_f_peak, kNum10msSubframes); + double lpc[kNum10msSubframes * (kLpcOrder + 1)]; + // For all sub-frames. + GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); + + const size_t kNumDftCoefficients = kDftSize / 2 + 1; + float data[kDftSize]; + + for (size_t i = 0; i < kNum10msSubframes; i++) { + // Convert to float with zero pad. + memset(data, 0, sizeof(data)); + for (size_t n = 0; n < kLpcOrder + 1; n++) { + data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]); + } + // Transform to frequency domain. + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + + size_t index_peak = 0; + float prev_magn_sqr = data[0] * data[0]; + float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; + float next_magn_sqr; + bool found_peak = false; + for (size_t n = 2; n < kNumDftCoefficients - 1; n++) { + next_magn_sqr = + data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + found_peak = true; + index_peak = n - 1; + break; + } + prev_magn_sqr = curr_magn_sqr; + curr_magn_sqr = next_magn_sqr; + } + float fractional_index = 0; + if (!found_peak) { + // Checking if |kNumDftCoefficients - 1| is the local minimum. + next_magn_sqr = data[1] * data[1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + index_peak = kNumDftCoefficients - 1; + } + } else { + // A peak is found, do a simple quadratic interpolation to get a more + // accurate estimate of the peak location. + fractional_index = + QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr); + } + f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; + } +} + +// Using iSAC functions to estimate pitch gains & lags. +void VadAudioProc::PitchAnalysis(double* log_pitch_gains, + double* pitch_lags_hz, + size_t length) { + // TODO(turajs): This can be "imported" from iSAC & and the next two + // constants. + RTC_DCHECK_GE(length, kNum10msSubframes); + const int kNumPitchSubframes = 4; + double gains[kNumPitchSubframes]; + double lags[kNumPitchSubframes]; + + const int kNumSubbandFrameSamples = 240; + const int kNumLookaheadSamples = 24; + + float lower[kNumSubbandFrameSamples]; + float upper[kNumSubbandFrameSamples]; + double lower_lookahead[kNumSubbandFrameSamples]; + double upper_lookahead[kNumSubbandFrameSamples]; + double lower_lookahead_pre_filter[kNumSubbandFrameSamples + + kNumLookaheadSamples]; + + // Split signal to lower and upper bands + WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower, + upper, lower_lookahead, upper_lookahead, + pre_filter_handle_.get()); + WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, + pitch_analysis_handle_.get(), lags, gains); + + // Lags are computed on lower-band signal with sampling rate half of the + // input signal. + GetSubframesPitchParameters( + kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, + &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); +} + +void VadAudioProc::Rms(double* rms, size_t length_rms) { + RTC_DCHECK_GE(length_rms, kNum10msSubframes); + size_t offset = kNumPastSignalSamples; + for (size_t i = 0; i < kNum10msSubframes; i++) { + rms[i] = 0; + for (size_t n = 0; n < kNumSubframeSamples; n++, offset++) + rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; + rms[i] = sqrt(rms[i] / kNumSubframeSamples); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc.h new file mode 100644 index 0000000000..b1441a08c0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ + +#include <memory> + +#include "modules/audio_processing/vad/common.h" +#include "typedefs.h" // NOLINT(build/include) + +namespace webrtc { + +class AudioFrame; +class PoleZeroFilter; + +class VadAudioProc { + public: + // Forward declare iSAC structs. + struct PitchAnalysisStruct; + struct PreFiltBankstr; + + VadAudioProc(); + ~VadAudioProc(); + + int ExtractFeatures(const int16_t* audio_frame, + size_t length, + AudioFeatures* audio_features); + + static const size_t kDftSize = 512; + + private: + void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); + void SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index); + void GetLpcPolynomials(double* lpc, size_t length_lpc); + void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); + void Rms(double* rms, size_t length_rms); + void ResetBuffer(); + + // To compute spectral peak we perform LPC analysis to get spectral envelope. + // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. + // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame + // we need 5 ms of past signal to create the input of LPC analysis. + enum : size_t { + kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200) + }; + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + enum : int { kNoError = 0 }; + + enum : size_t { kNum10msSubframes = 3 }; + enum : size_t { + kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100) + }; + enum : size_t { + // Samples in 30 ms @ given sampling rate. + kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples + }; + enum : size_t { + kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess + }; + enum : size_t { kIpLength = kDftSize >> 1 }; + enum : size_t { kWLength = kDftSize >> 1 }; + enum : size_t { kLpcOrder = 16 }; + + size_t ip_[kIpLength]; + float w_fft_[kWLength]; + + // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). + float audio_buffer_[kBufferLength]; + size_t num_buffer_samples_; + + double log_old_gain_; + double old_lag_; + + std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_; + std::unique_ptr<PreFiltBankstr> pre_filter_handle_; + std::unique_ptr<PoleZeroFilter> high_pass_filter_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h new file mode 100644 index 0000000000..ab1e63651f --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ + +namespace webrtc { + +// These values should match MATLAB counterparts for unit-tests to pass. +static const double kCorrWeight[] = {1.000000, + 0.985000, + 0.970225, + 0.955672, + 0.941337, + 0.927217, + 0.913308, + 0.899609, + 0.886115, + 0.872823, + 0.859730, + 0.846834, + 0.834132, + 0.821620, + 0.809296, + 0.797156, + 0.785199}; + +static const double kLpcAnalWin[] = { + 0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639, + 0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883, + 0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547, + 0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438, + 0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222, + 0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713, + 0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164, + 0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546, + 0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810, + 0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148, + 0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233, + 0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442, + 0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069, + 0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512, + 0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447, + 0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979, + 0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773, + 0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158, + 0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215, + 0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840, + 0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778, + 0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639, + 0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889, + 0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814, + 0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465, + 0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574, + 0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451, + 0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858, + 0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862, + 0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664, + 0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416, + 0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008, + 0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853, + 0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642, + 0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093, + 0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687, + 0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387, + 0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358, + 0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670, + 0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000}; + +static const size_t kFilterOrder = 2; +static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, + -1.949650f, + 0.974827f}; +static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, + -1.971999f, + 0.972457f}; + +static_assert(kFilterOrder + 1 == + sizeof(kCoeffNumerator) / sizeof(kCoeffNumerator[0]), + "numerator coefficients incorrect size"); +static_assert(kFilterOrder + 1 == + sizeof(kCoeffDenominator) / sizeof(kCoeffDenominator[0]), + "denominator coefficients incorrect size"); + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc new file mode 100644 index 0000000000..c520257f8e --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// We don't test the value of pitch gain and lags as they are created by iSAC +// routines. However, interpolation of pitch-gain and lags is in a separate +// class and has its own unit-test. + +#include "modules/audio_processing/vad/vad_audio_proc.h" + +#include <math.h> +#include <stdio.h> + +#include <string> + +#include "modules/audio_processing/vad/common.h" +#include "modules/include/module_common_types.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { + +TEST(AudioProcessingTest, DISABLED_ComputingFirstSpectralPeak) { + VadAudioProc audioproc; + + std::string peak_file_name = + test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat"); + FILE* peak_file = fopen(peak_file_name.c_str(), "rb"); + ASSERT_TRUE(peak_file != NULL); + + std::string pcm_file_name = + test::ResourcePath("audio_processing/agc/agc_audio", "pcm"); + FILE* pcm_file = fopen(pcm_file_name.c_str(), "rb"); + ASSERT_TRUE(pcm_file != NULL); + + // Read 10 ms audio in each iteration. + const size_t kDataLength = kLength10Ms; + int16_t data[kDataLength] = {0}; + AudioFeatures features; + double sp[kMaxNumFrames]; + while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) { + audioproc.ExtractFeatures(data, kDataLength, &features); + if (features.num_frames > 0) { + ASSERT_LT(features.num_frames, kMaxNumFrames); + // Read reference values. + const size_t num_frames = features.num_frames; + ASSERT_EQ(num_frames, fread(sp, sizeof(sp[0]), num_frames, peak_file)); + for (size_t n = 0; n < features.num_frames; n++) + EXPECT_NEAR(features.spectral_peak[n], sp[n], 3); + } + } + + fclose(peak_file); + fclose(pcm_file); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer.cc new file mode 100644 index 0000000000..3c4d5ad0e0 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer.cc @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_circular_buffer.h" + +#include <stdlib.h> + +namespace webrtc { + +VadCircularBuffer::VadCircularBuffer(int buffer_size) + : buffer_(new double[buffer_size]), + is_full_(false), + index_(0), + buffer_size_(buffer_size), + sum_(0) { +} + +VadCircularBuffer::~VadCircularBuffer() { +} + +void VadCircularBuffer::Reset() { + is_full_ = false; + index_ = 0; + sum_ = 0; +} + +VadCircularBuffer* VadCircularBuffer::Create(int buffer_size) { + if (buffer_size <= 0) + return NULL; + return new VadCircularBuffer(buffer_size); +} + +double VadCircularBuffer::Oldest() const { + if (!is_full_) + return buffer_[0]; + else + return buffer_[index_]; +} + +double VadCircularBuffer::Mean() { + double m; + if (is_full_) { + m = sum_ / buffer_size_; + } else { + if (index_ > 0) + m = sum_ / index_; + else + m = 0; + } + return m; +} + +void VadCircularBuffer::Insert(double value) { + if (is_full_) { + sum_ -= buffer_[index_]; + } + sum_ += value; + buffer_[index_] = value; + index_++; + if (index_ >= buffer_size_) { + is_full_ = true; + index_ = 0; + } +} +int VadCircularBuffer::BufferLevel() { + if (is_full_) + return buffer_size_; + return index_; +} + +int VadCircularBuffer::Get(int index, double* value) const { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + *value = buffer_[index]; + return 0; +} + +int VadCircularBuffer::Set(int index, double value) { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + + sum_ -= buffer_[index]; + buffer_[index] = value; + sum_ += value; + return 0; +} + +int VadCircularBuffer::ConvertToLinearIndex(int* index) const { + if (*index < 0 || *index >= buffer_size_) + return -1; + + if (!is_full_ && *index >= index_) + return -1; + + *index = index_ - 1 - *index; + if (*index < 0) + *index += buffer_size_; + return 0; +} + +int VadCircularBuffer::RemoveTransient(int width_threshold, + double val_threshold) { + if (!is_full_ && index_ < width_threshold + 2) + return 0; + + int index_1 = 0; + int index_2 = width_threshold + 1; + double v = 0; + if (Get(index_1, &v) < 0) + return -1; + if (v < val_threshold) { + Set(index_1, 0); + int index; + for (index = index_2; index > index_1; index--) { + if (Get(index, &v) < 0) + return -1; + if (v < val_threshold) + break; + } + for (; index > index_1; index--) { + if (Set(index, 0.0) < 0) + return -1; + } + } + return 0; +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer.h new file mode 100644 index 0000000000..46b03d49bf --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ + +#include <memory> + +namespace webrtc { + +// A circular buffer tailored to the need of this project. It stores last +// K samples of the input, and keeps track of the mean of the last samples. +// +// It is used in class "PitchBasedActivity" to keep track of posterior +// probabilities in the past few seconds. The posterior probabilities are used +// to recursively update prior probabilities. +class VadCircularBuffer { + public: + static VadCircularBuffer* Create(int buffer_size); + ~VadCircularBuffer(); + + // If buffer is wrapped around. + bool is_full() const { return is_full_; } + // Get the oldest entry in the buffer. + double Oldest() const; + // Insert new value into the buffer. + void Insert(double value); + // Reset buffer, forget the past, start fresh. + void Reset(); + + // The mean value of the elements in the buffer. The return value is zero if + // buffer is empty, i.e. no value is inserted. + double Mean(); + // Remove transients. If the values exceed |val_threshold| for a period + // shorter then or equal to |width_threshold|, then that period is considered + // transient and set to zero. + int RemoveTransient(int width_threshold, double val_threshold); + + private: + explicit VadCircularBuffer(int buffer_size); + // Get previous values. |index = 0| corresponds to the most recent + // insertion. |index = 1| is the one before the most recent insertion, and + // so on. + int Get(int index, double* value) const; + // Set a given position to |value|. |index| is interpreted as above. + int Set(int index, double value); + // Return the number of valid elements in the buffer. + int BufferLevel(); + + // Convert an index with the interpretation as get() method to the + // corresponding linear index. + int ConvertToLinearIndex(int* index) const; + + std::unique_ptr<double[]> buffer_; + bool is_full_; + int index_; + int buffer_size_; + double sum_; +}; + +} // namespace webrtc +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc new file mode 100644 index 0000000000..efbd70d9d9 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/vad_circular_buffer_unittest.cc @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/vad_circular_buffer.h" + +#include <stdio.h> + +#include <memory> + +#include "test/gtest.h" + +namespace webrtc { + +static const int kWidthThreshold = 7; +static const double kValThreshold = 1.0; +static const int kLongBuffSize = 100; +static const int kShortBuffSize = 10; + +static void InsertSequentially(int k, VadCircularBuffer* circular_buffer) { + double mean_val; + for (int n = 1; n <= k; n++) { + EXPECT_TRUE(!circular_buffer->is_full()); + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_EQ((n + 1.0) / 2., mean_val); + } +} + +static void Insert(double value, + int num_insertion, + VadCircularBuffer* circular_buffer) { + for (int n = 0; n < num_insertion; n++) + circular_buffer->Insert(value); +} + +static void InsertZeros(int num_zeros, VadCircularBuffer* circular_buffer) { + Insert(0.0, num_zeros, circular_buffer); +} + +TEST(VadCircularBufferTest, GeneralTest) { + std::unique_ptr<VadCircularBuffer> circular_buffer( + VadCircularBuffer::Create(kShortBuffSize)); + double mean_val; + + // Mean should return zero if nothing is inserted. + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0.0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + + // Should be full. + EXPECT_TRUE(circular_buffer->is_full()); + // Correct update after being full. + for (int n = 1; n < kShortBuffSize; n++) { + circular_buffer->Insert(n); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ((kShortBuffSize + 1.) / 2., mean_val); + EXPECT_TRUE(circular_buffer->is_full()); + } + + // Check reset. This should be like starting fresh. + circular_buffer->Reset(); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + InsertSequentially(kShortBuffSize, circular_buffer.get()); + EXPECT_TRUE(circular_buffer->is_full()); +} + +TEST(VadCircularBufferTest, TransientsRemoval) { + std::unique_ptr<VadCircularBuffer> circular_buffer( + VadCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + InsertZeros(kLongBuffSize - kWidthThreshold / 2, circular_buffer.get()); + + double push_val = kValThreshold; + double mean_val; + for (int k = kWidthThreshold; k >= 1; k--) { + Insert(push_val, k, circular_buffer.get()); + circular_buffer->Insert(0); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(k * push_val / kLongBuffSize, mean_val); + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(0, mean_val); + } +} + +TEST(VadCircularBufferTest, TransientDetection) { + std::unique_ptr<VadCircularBuffer> circular_buffer( + VadCircularBuffer::Create(kLongBuffSize)); + // Let the first transient be in wrap-around. + int num_insertion = kLongBuffSize - kWidthThreshold / 2; + InsertZeros(num_insertion, circular_buffer.get()); + + double push_val = 2; + // This is longer than a transient and shouldn't be removed. + int num_non_zero_elements = kWidthThreshold + 1; + Insert(push_val, num_non_zero_elements, circular_buffer.get()); + + double mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + circular_buffer->Insert(0); + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // A transient right after a non-transient, should be removed and mean is + // not changed. + num_insertion = 3; + Insert(push_val, num_insertion, circular_buffer.get()); + circular_buffer->Insert(0); + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); + + // Last input is larger than threshold, although the sequence is short but + // it shouldn't be considered transient. + Insert(push_val, num_insertion, circular_buffer.get()); + num_non_zero_elements += num_insertion; + EXPECT_EQ(0, + circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold)); + mean_val = circular_buffer->Mean(); + EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector.cc new file mode 100644 index 0000000000..66a704f6be --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/voice_activity_detector.h" + +#include <algorithm> + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +const size_t kNumChannels = 1; + +const double kDefaultVoiceValue = 1.0; +const double kNeutralProbability = 0.5; +const double kLowProbability = 0.01; + +} // namespace + +VoiceActivityDetector::VoiceActivityDetector() + : last_voice_probability_(kDefaultVoiceValue), + standalone_vad_(StandaloneVad::Create()) { +} + +VoiceActivityDetector::~VoiceActivityDetector() = default; + +// Because ISAC has a different chunk length, it updates +// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. +// Otherwise it clears them. +void VoiceActivityDetector::ProcessChunk(const int16_t* audio, + size_t length, + int sample_rate_hz) { + RTC_DCHECK_EQ(length, sample_rate_hz / 100); + // Resample to the required rate. + const int16_t* resampled_ptr = audio; + if (sample_rate_hz != kSampleRateHz) { + RTC_CHECK_EQ( + resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), + 0); + resampler_.Push(audio, length, resampled_, kLength10Ms, length); + resampled_ptr = resampled_; + } + RTC_DCHECK_EQ(length, kLength10Ms); + + // Each chunk needs to be passed into |standalone_vad_|, because internally it + // buffers the audio and processes it all at once when GetActivity() is + // called. + RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); + + audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); + + chunkwise_voice_probabilities_.resize(features_.num_frames); + chunkwise_rms_.resize(features_.num_frames); + std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), + chunkwise_rms_.begin()); + if (features_.num_frames > 0) { + if (features_.silence) { + // The other features are invalid, so set the voice probabilities to an + // arbitrary low value. + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kLowProbability); + } else { + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kNeutralProbability); + RTC_CHECK_GE( + standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], + chunkwise_voice_probabilities_.size()), + 0); + RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( + features_, &chunkwise_voice_probabilities_[0]), + 0); + } + last_voice_probability_ = chunkwise_voice_probabilities_.back(); + } +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector.h new file mode 100644 index 0000000000..0079cb2184 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ + +#include <memory> +#include <vector> + +#include "common_audio/resampler/include/resampler.h" +#include "modules/audio_processing/vad/vad_audio_proc.h" +#include "modules/audio_processing/vad/common.h" +#include "modules/audio_processing/vad/pitch_based_vad.h" +#include "modules/audio_processing/vad/standalone_vad.h" + +namespace webrtc { + +// A Voice Activity Detector (VAD) that combines the voice probability from the +// StandaloneVad and PitchBasedVad to get a more robust estimation. +class VoiceActivityDetector { + public: + VoiceActivityDetector(); + ~VoiceActivityDetector(); + + // Processes each audio chunk and estimates the voice probability. + void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz); + + // Returns a vector of voice probabilities for each chunk. It can be empty for + // some chunks, but it catches up afterwards returning multiple values at + // once. + const std::vector<double>& chunkwise_voice_probabilities() const { + return chunkwise_voice_probabilities_; + } + + // Returns a vector of RMS values for each chunk. It has the same length as + // chunkwise_voice_probabilities(). + const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; } + + // Returns the last voice probability, regardless of the internal + // implementation, although it has a few chunks of delay. + float last_voice_probability() const { return last_voice_probability_; } + + private: + // TODO(aluebs): Change these to float. + std::vector<double> chunkwise_voice_probabilities_; + std::vector<double> chunkwise_rms_; + + float last_voice_probability_; + + Resampler resampler_; + VadAudioProc audio_processing_; + + std::unique_ptr<StandaloneVad> standalone_vad_; + PitchBasedVad pitch_based_vad_; + + int16_t resampled_[kLength10Ms]; + AudioFeatures features_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc new file mode 100644 index 0000000000..62eda2bd3d --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_activity_detector_unittest.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/vad/voice_activity_detector.h" + +#include <algorithm> +#include <vector> + +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { +namespace { + +const int kStartTimeSec = 16; +const float kMeanSpeechProbability = 0.3f; +const float kMaxNoiseProbability = 0.1f; +const size_t kNumChunks = 300u; +const size_t kNumChunksPerIsacBlock = 3; + +void GenerateNoise(std::vector<int16_t>* data) { + for (size_t i = 0; i < data->size(); ++i) { + // std::rand returns between 0 and RAND_MAX, but this will work because it + // wraps into some random place. + (*data)[i] = std::rand(); + } +} + +} // namespace + +TEST(VoiceActivityDetectorTest, ConstructorSetsDefaultValues) { + const float kDefaultVoiceValue = 1.f; + + VoiceActivityDetector vad; + + std::vector<double> p = vad.chunkwise_voice_probabilities(); + std::vector<double> rms = vad.chunkwise_rms(); + + EXPECT_EQ(p.size(), 0u); + EXPECT_EQ(rms.size(), 0u); + + EXPECT_FLOAT_EQ(vad.last_voice_probability(), kDefaultVoiceValue); +} + +TEST(VoiceActivityDetectorTest, Speech16kHzHasHighVoiceProbabilities) { + const int kSampleRateHz = 16000; + const int kLength10Ms = kSampleRateHz / 100; + + VoiceActivityDetector vad; + + std::vector<int16_t> data(kLength10Ms); + float mean_probability = 0.f; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/transient/audio16kHz", "pcm") + .c_str(), + "rb"); + ASSERT_TRUE(pcm_file != nullptr); + // The silences in the file are skipped to get a more robust voice probability + // for speech. + ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), + SEEK_SET), + 0); + + size_t num_chunks = 0; + while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == + data.size()) { + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + mean_probability += vad.last_voice_probability(); + + ++num_chunks; + } + + mean_probability /= num_chunks; + + EXPECT_GT(mean_probability, kMeanSpeechProbability); +} + +TEST(VoiceActivityDetectorTest, Speech32kHzHasHighVoiceProbabilities) { + const int kSampleRateHz = 32000; + const int kLength10Ms = kSampleRateHz / 100; + + VoiceActivityDetector vad; + + std::vector<int16_t> data(kLength10Ms); + float mean_probability = 0.f; + + FILE* pcm_file = + fopen(test::ResourcePath("audio_processing/transient/audio32kHz", "pcm") + .c_str(), + "rb"); + ASSERT_TRUE(pcm_file != nullptr); + // The silences in the file are skipped to get a more robust voice probability + // for speech. + ASSERT_EQ(fseek(pcm_file, kStartTimeSec * kSampleRateHz * sizeof(data[0]), + SEEK_SET), + 0); + + size_t num_chunks = 0; + while (fread(&data[0], sizeof(data[0]), data.size(), pcm_file) == + data.size()) { + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + mean_probability += vad.last_voice_probability(); + + ++num_chunks; + } + + mean_probability /= num_chunks; + + EXPECT_GT(mean_probability, kMeanSpeechProbability); +} + +TEST(VoiceActivityDetectorTest, Noise16kHzHasLowVoiceProbabilities) { + VoiceActivityDetector vad; + + std::vector<int16_t> data(kLength10Ms); + float max_probability = 0.f; + + std::srand(42); + + for (size_t i = 0; i < kNumChunks; ++i) { + GenerateNoise(&data); + + vad.ProcessChunk(&data[0], data.size(), kSampleRateHz); + + // Before the |vad has enough data to process an ISAC block it will return + // the default value, 1.f, which would ruin the |max_probability| value. + if (i > kNumChunksPerIsacBlock) { + max_probability = std::max(max_probability, vad.last_voice_probability()); + } + } + + EXPECT_LT(max_probability, kMaxNoiseProbability); +} + +TEST(VoiceActivityDetectorTest, Noise32kHzHasLowVoiceProbabilities) { + VoiceActivityDetector vad; + + std::vector<int16_t> data(2 * kLength10Ms); + float max_probability = 0.f; + + std::srand(42); + + for (size_t i = 0; i < kNumChunks; ++i) { + GenerateNoise(&data); + + vad.ProcessChunk(&data[0], data.size(), 2 * kSampleRateHz); + + // Before the |vad has enough data to process an ISAC block it will return + // the default value, 1.f, which would ruin the |max_probability| value. + if (i > kNumChunksPerIsacBlock) { + max_probability = std::max(max_probability, vad.last_voice_probability()); + } + } + + EXPECT_LT(max_probability, kMaxNoiseProbability); +} + +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_gmm_tables.h b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_gmm_tables.h new file mode 100644 index 0000000000..29cc7d6cb2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/vad/voice_gmm_tables.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for active segments. Generated by MakeGmmTables.m. + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ + +static const int kVoiceGmmNumMixtures = 12; +static const int kVoiceGmmDim = 3; + +static const double + kVoiceGmmCovarInverse[kVoiceGmmNumMixtures][kVoiceGmmDim][kVoiceGmmDim] = { + {{1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03}, + {-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04}, + {4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}}, + {{6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03}, + {-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05}, + {-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}}, + {{9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03}, + {-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05}, + {-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}}, + {{3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02}, + {-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05}, + {-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}}, + {{1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02}, + {-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05}, + {-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}}, + {{1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02}, + {-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06}, + {-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}}, + {{8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02}, + {-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06}, + {-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}}, + {{2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04}, + {-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06}, + {7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}}, + {{3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02}, + {1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05}, + {-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}}, + {{6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04}, + {-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06}, + {-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}}, + {{2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03}, + {-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05}, + {-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}}, + {{1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02}, + {-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05}, + {-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}}; + +static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = { + {-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02}, + {-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02}, + {-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02}, + {-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02}, + {-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02}, + {-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02}, + {-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02}, + {-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02}, + {-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02}, + {-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02}, + {-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02}, + {-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}}; + +static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = { + -1.39789694361035e+01, + -1.19527720202104e+01, + -1.32396317929055e+01, + -1.09436815209238e+01, + -1.13440027478149e+01, + -1.12200721834504e+01, + -1.02537324043693e+01, + -1.60789861938302e+01, + -1.03394494048344e+01, + -1.83207938586818e+01, + -1.31186044948288e+01, + -9.52479998673554e+00}; +#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_impl.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_impl.cc new file mode 100644 index 0000000000..5ee0c7b9c2 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_impl.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/voice_detection_impl.h" + +#include "common_audio/vad/include/webrtc_vad.h" +#include "modules/audio_processing/audio_buffer.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +class VoiceDetectionImpl::Vad { + public: + Vad() { + state_ = WebRtcVad_Create(); + RTC_CHECK(state_); + int error = WebRtcVad_Init(state_); + RTC_DCHECK_EQ(0, error); + } + ~Vad() { + WebRtcVad_Free(state_); + } + VadInst* state() { return state_; } + private: + VadInst* state_ = nullptr; + RTC_DISALLOW_COPY_AND_ASSIGN(Vad); +}; + +VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) + : crit_(crit) { + RTC_DCHECK(crit); +} + +VoiceDetectionImpl::~VoiceDetectionImpl() {} + +void VoiceDetectionImpl::Initialize(int sample_rate_hz) { + rtc::CritScope cs(crit_); + sample_rate_hz_ = sample_rate_hz; + std::unique_ptr<Vad> new_vad; + if (enabled_) { + new_vad.reset(new Vad()); + } + vad_.swap(new_vad); + using_external_vad_ = false; + frame_size_samples_ = + static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; + set_likelihood(likelihood_); +} + +void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs(crit_); + if (!enabled_) { + return; + } + if (using_external_vad_) { + using_external_vad_ = false; + return; + } + + RTC_DCHECK_GE(160, audio->num_frames_per_band()); + // TODO(ajm): concatenate data in frame buffer here. + int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, + audio->mixed_low_pass_data(), + frame_size_samples_); + if (vad_ret == 0) { + stream_has_voice_ = false; + audio->set_activity(AudioFrame::kVadPassive); + } else if (vad_ret == 1) { + stream_has_voice_ = true; + audio->set_activity(AudioFrame::kVadActive); + } else { + RTC_NOTREACHED(); + } +} + +int VoiceDetectionImpl::Enable(bool enable) { + rtc::CritScope cs(crit_); + if (enabled_ != enable) { + enabled_ = enable; + Initialize(sample_rate_hz_); + } + return AudioProcessing::kNoError; +} + +bool VoiceDetectionImpl::is_enabled() const { + rtc::CritScope cs(crit_); + return enabled_; +} + +int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { + rtc::CritScope cs(crit_); + using_external_vad_ = true; + stream_has_voice_ = has_voice; + return AudioProcessing::kNoError; +} + +bool VoiceDetectionImpl::stream_has_voice() const { + rtc::CritScope cs(crit_); + // TODO(ajm): enable this assertion? + //RTC_DCHECK(using_external_vad_ || is_component_enabled()); + return stream_has_voice_; +} + +int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { + rtc::CritScope cs(crit_); + likelihood_ = likelihood; + if (enabled_) { + int mode = 2; + switch (likelihood) { + case VoiceDetection::kVeryLowLikelihood: + mode = 3; + break; + case VoiceDetection::kLowLikelihood: + mode = 2; + break; + case VoiceDetection::kModerateLikelihood: + mode = 1; + break; + case VoiceDetection::kHighLikelihood: + mode = 0; + break; + default: + RTC_NOTREACHED(); + break; + } + int error = WebRtcVad_set_mode(vad_->state(), mode); + RTC_DCHECK_EQ(0, error); + } + return AudioProcessing::kNoError; +} + +VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { + rtc::CritScope cs(crit_); + return likelihood_; +} + +int VoiceDetectionImpl::set_frame_size_ms(int size) { + rtc::CritScope cs(crit_); + RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. + frame_size_ms_ = size; + Initialize(sample_rate_hz_); + return AudioProcessing::kNoError; +} + +int VoiceDetectionImpl::frame_size_ms() const { + rtc::CritScope cs(crit_); + return frame_size_ms_; +} +} // namespace webrtc diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_impl.h b/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_impl.h new file mode 100644 index 0000000000..4b724bdd25 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_impl.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ +#define MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ + +#include <memory> + +#include "modules/audio_processing/include/audio_processing.h" +#include "rtc_base/constructormagic.h" +#include "rtc_base/criticalsection.h" + +namespace webrtc { + +class AudioBuffer; + +class VoiceDetectionImpl : public VoiceDetection { + public: + explicit VoiceDetectionImpl(rtc::CriticalSection* crit); + ~VoiceDetectionImpl() override; + + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(int sample_rate_hz); + void ProcessCaptureAudio(AudioBuffer* audio); + + // VoiceDetection implementation. + int Enable(bool enable) override; + bool is_enabled() const override; + int set_stream_has_voice(bool has_voice) override; + bool stream_has_voice() const override; + int set_likelihood(Likelihood likelihood) override; + Likelihood likelihood() const override; + int set_frame_size_ms(int size) override; + int frame_size_ms() const override; + + private: + class Vad; + rtc::CriticalSection* const crit_; + bool enabled_ RTC_GUARDED_BY(crit_) = false; + bool stream_has_voice_ RTC_GUARDED_BY(crit_) = false; + bool using_external_vad_ RTC_GUARDED_BY(crit_) = false; + Likelihood likelihood_ RTC_GUARDED_BY(crit_) = kLowLikelihood; + int frame_size_ms_ RTC_GUARDED_BY(crit_) = 10; + size_t frame_size_samples_ RTC_GUARDED_BY(crit_) = 0; + int sample_rate_hz_ RTC_GUARDED_BY(crit_) = 0; + std::unique_ptr<Vad> vad_ RTC_GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(VoiceDetectionImpl); +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ diff --git a/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_unittest.cc b/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_unittest.cc new file mode 100644 index 0000000000..663913b638 --- /dev/null +++ b/third_party/libwebrtc/webrtc/modules/audio_processing/voice_detection_unittest.cc @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <vector> + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "modules/audio_processing/voice_detection_impl.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 1000; + +// Process one frame of data and produce the output. +void ProcessOneFrame(int sample_rate_hz, + AudioBuffer* audio_buffer, + VoiceDetectionImpl* voice_detection) { + if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { + audio_buffer->SplitIntoFrequencyBands(); + } + + voice_detection->ProcessCaptureAudio(audio_buffer); +} + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + int frame_size_ms_reference, + bool stream_has_voice_reference, + VoiceDetection::Likelihood likelihood_reference) { + rtc::CriticalSection crit_capture; + VoiceDetectionImpl voice_detection(&crit_capture); + voice_detection.Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz); + voice_detection.Enable(true); + + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector<float> capture_input(samples_per_channel * num_channels); + for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + ProcessOneFrame(sample_rate_hz, &capture_buffer, &voice_detection); + } + + int frame_size_ms = voice_detection.frame_size_ms(); + bool stream_has_voice = voice_detection.stream_has_voice(); + VoiceDetection::Likelihood likelihood = voice_detection.likelihood(); + + // Compare the outputs to the references. + EXPECT_EQ(frame_size_ms_reference, frame_size_ms); + EXPECT_EQ(stream_has_voice_reference, stream_has_voice); + EXPECT_EQ(likelihood_reference, likelihood); +} + +const int kFrameSizeMsReference = 10; +const bool kStreamHasVoiceReference = true; +const VoiceDetection::Likelihood kLikelihoodReference = + VoiceDetection::kLowLikelihood; + +} // namespace + +TEST(VoiceDetectionBitExactnessTest, Mono8kHz) { + RunBitexactnessTest(8000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +TEST(VoiceDetectionBitExactnessTest, Mono16kHz) { + RunBitexactnessTest(16000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +TEST(VoiceDetectionBitExactnessTest, Mono32kHz) { + RunBitexactnessTest(32000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +TEST(VoiceDetectionBitExactnessTest, Mono48kHz) { + RunBitexactnessTest(48000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +TEST(VoiceDetectionBitExactnessTest, Stereo8kHz) { + RunBitexactnessTest(8000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +TEST(VoiceDetectionBitExactnessTest, Stereo16kHz) { + RunBitexactnessTest(16000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +TEST(VoiceDetectionBitExactnessTest, Stereo32kHz) { + RunBitexactnessTest(32000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +TEST(VoiceDetectionBitExactnessTest, Stereo48kHz) { + RunBitexactnessTest(48000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, + kLikelihoodReference); +} + +} // namespace webrtc |